source: trunk/plugins/MootoolsFileManager/mootools-filemanager/Assets/Connector/ID3.class.php

Last change on this file was 1321, checked in by gogo, 7 years ago

Merge the MootoolsFileManager?-Update branch into the trunk.

File size: 18.5 KB
Line 
1<?php
2
3/**
4 *  Obtain id3 information from mp3 files.
5 *
6 *  @author James Sleeman (james@gogo.co.nz)
7 *  @website http://www.gogo.co.nz
8 *  @license MIT  (http://en.wikipedia.org/wiki/MIT_License)
9 *
10 *  With thanks for inspiration and a small amount of code to:
11 *  Author    : de77
12 *  Website   : www.de77.com
13 *  Class desc  : http://de77.com/php/php-class-how-to-read-id3v2-tags-from-mp3-files
14 *  Class desc  : http://de77.com/php/php-class-how-to-read-id3-v1-tag-from-mp3-files
15 *
16 *  Acknowledgement to: http://www.autistici.org/ermes/index.php?pag=1&post=15
17 *  which I was going to use, but looked to be a bit fragile
18 */
19 
20class id3Parser
21{
22  public $error;
23 
24 
25  private $tags = array(
26    // V2.3/4
27    'TALB' => 'album',
28    'TCON' => 'genre',
29    'TENC' => 'encoder',
30    'TIT2' => 'title',
31    'TPE1' => 'artist',
32    'TPE2' => 'ensemble',
33    'TYER' => 'year',
34    'TCOM' => 'composer',
35    'TCOP' => 'copyright',
36    'TRCK' => 'track',
37    'WXXX' => 'url',
38    'COMM' => 'comment',
39   
40    // V2.2     
41    'TAL' => 'album',     
42    'TCO' => 'genre',     
43    'TEN' => 'encoder',
44    'TT2' => 'title',     
45    'TP1' => 'artist',
46    'TP2' => 'ensemble',
47    'TYE' => 'year',
48    'TCM' => 'composer',
49    'TCR' => 'copyright',
50    'TRK' => 'track',
51    'WXX' => 'url',
52    'COM' => 'comment'
53  );
54 
55  // ID3v1 Genre Mapping
56  private $genres = array(
57    'Blues',
58    'Classic Rock',
59    'Country',
60    'Dance',
61    'Disco',
62    'Funk',
63    'Grunge',
64    'Hip-Hop',
65    'Jazz',
66    'Metal',
67    'New Age',
68    'Oldies',
69    'Other',
70    'Pop',
71    'R&B',
72    'Rap',
73    'Reggae',
74    'Rock',
75    'Techno',
76    'Industrial',
77    'Alternative',
78    'Ska',
79    'Death Metal',
80    'Pranks',
81    'Soundtrack',
82    'Euro-Techno',
83    'Ambient',
84    'Trip-Hop',
85    'Vocal',
86    'Jazz+Funk',
87    'Fusion',
88    'Trance',
89    'Classical',
90    'Instrumental',
91    'Acid',
92    'House',
93    'Game',
94    'Sound Clip',
95    'Gospel',
96    'Noise',
97    'AlternRock',
98    'Bass',
99    'Soul',
100    'Punk',
101    'Space',
102    'Meditative',
103    'Instrumental Pop',
104    'Instrumental Rock',
105    'Ethnic',
106    'Gothic',
107    'Darkwave',
108    'Techno-Industrial',
109    'Electronic',
110    'Pop-Folk',
111    'Eurodance',
112    'Dream',
113    'Southern Rock',
114    'Comedy',
115    'Cult',
116    'Gangsta',
117    'Top 40',
118    'Christian Rap',
119    'Pop/Funk',
120    'Jungle',
121    'Native American',
122    'Cabaret',
123    'New Wave',
124    'Psychadelic',
125    'Rave',
126    'Showtunes',
127    'Trailer',
128    'Lo-Fi',
129    'Tribal',
130    'Acid Punk',
131    'Acid Jazz',
132    'Polka',
133    'Retro',
134    'Musical',
135    'Rock & Roll',
136    'Hard Rock',
137    'Folk',
138    'Folk-Rock',
139    'National Folk',
140    'Swing',
141    'Fast Fusion',
142    'Bebob',
143    'Latin',
144    'Revival',
145    'Celtic',
146    'Bluegrass',
147    'Avantgarde',
148    'Gothic Rock',
149    'Progressive Rock',
150    'Psychedelic Rock',
151    'Symphonic Rock',
152    'Slow Rock',
153    'Big Band',
154    'Chorus',
155    'Easy Listening',
156    'Acoustic',
157    'Humour',
158    'Speech',
159    'Chanson',
160    'Opera',
161    'Chamber Music',
162    'Sonata',
163    'Symphony',
164    'Booty Bass',
165    'Primus',
166    'Porn Groove',
167    'Satire',
168    'Slow Jam',
169    'Club',
170    'Tango',
171    'Samba',
172    'Folklore',
173    'Ballad',
174    'Power Ballad',
175    'Rhythmic Soul',
176    'Freestyle',
177    'Duet',
178    'Punk Rock',
179    'Drum Solo',
180    'Acapella',
181    'Euro-House',
182    'Dance Hall'
183    );   
184 
185  /** Given a path to an mp3 file, interrogate the file to find any id3 tags in it.
186   *  return an associative array
187   *  can handle id3v1, v2.2, v2.3 and v2.4, however it is not a complete parser, just good-enough
188   *  compressed and encrypted frames are skipped, only (T)ext and (W)ww frames are typically returned
189   *
190   */
191 
192  public function read($file)
193  {
194    $f = fopen($file, 'r');
195    $result = array();
196    if( fread($f, 3) == 'ID3')
197    {
198      // ID3v2 tag at start of file, use that
199      rewind($f);
200      $result = array_merge($result, $this->read_v2($f));
201    }
202   
203    if(!count($result))
204    {
205      fseek($f, -10, SEEK_END);
206      if( fread($f, 3) == '3DI' )
207      {
208        // Looking at a footer of an ID3v2, find the length and seek backwards to the start
209        fseek($f, -10, SEEK_END);
210        $result = array_merge($result, $this->read_v2($f));
211      }
212    }
213   
214    if(!count($result))
215    {
216      fseek($f, -128, SEEK_END);
217      if( fread($f, 3) == 'TAG' )
218      {
219        // v1 tag       
220        fseek($f, -128, SEEK_END);
221        $result = array_merge($result, $this->read_v1($f));
222      }
223    }
224   
225    if(!count($result))
226    {
227      // Still nothing, let's make a title anyway
228      $result['title'] = trim(preg_replace('/(\.mp3|%20|[_+ -]|(^[0-9]+\.?))+/i', ' ', basename($file)));
229      $result['id3']   = '0';
230    }
231   
232    return $result;
233   // echo ("NO ID3 ($file)\n");
234  }
235 
236  /** Decode the value of a text frame, returns in UTF-8 always */
237 
238  private function decode_v23_text_value($tag)
239  {
240    //mb_convert_encoding is corrupted in some versions of PHP so I use iconv
241    switch (ord($tag[0]))
242    {
243      case 0: //ISO-8859-1       
244          return @iconv('ISO-8859-1', 'UTF-8', substr($tag, 1));
245      case 1: //UTF-16 BOM     
246          return @iconv('UTF-16LE', 'UTF-8//IGNORE',  substr($tag.chr(0x00), 3));
247      case 2: //UTF-16BE
248          return @iconv('UTF-16BE', 'UTF-8', substr($tag.chr(0x00), 3));
249      case 3: //UTF-8
250          return substr($tag, 1);
251    }
252    return false;
253  }
254         
255  /** Some size fields in 2.3+ headers are "sync safe", we need to strip out certain bits and rebuild the size integer, bitwise.
256   */
257
258  private function desync_size($headersize)
259  {
260    // The header size needs fixing by stripping out certain bits (1st, 9th, 17th, 25th)
261    // 011111111 === 0x7F
262    $size =    $headersize & 0x7F; // Grab least sig 7 bits
263               $headersize = $headersize >> 8;   // shift out 8 bits
264   
265    $size =    (($headersize & 0x7F)<<7)|$size; // grab least sig 7 bits and shift 7 to the left then add to size
266               $headersize = $headersize >> 8;   // shift out 8 bits
267   
268    $size =    (($headersize & 0x7F)<<14)|$size; // grab least sig 7 bits and shift 14 to the left then add to size
269               $headersize = $headersize >> 8;   // shift out 8 bits
270   
271    $size =    (($headersize & 0x7F)<<21)|$size; // grab least sig 7 bits and shift 21 to the left then add to size
272               $headersize = $headersize >> 8;   // shift out 8 bits   
273   
274    return $size;
275  }
276
277  /** Read a specified number of bytes from the stream counted AFTER re-synchonisation (if necessary).
278   *  The spec isn't very clear, but I believe that in a 2.2/3 if the unsynchronised flag is on,
279   *  then the frame headers are unsynchronised also, which means if they happen to include an FF00
280   *  simply reading 10 raw bytes would not get a proper frame header.
281   *  We have to unsynchronise as we go and maybe ready more bytes.
282   *
283   *  In contrast, once we HAVE that frame header, the size specified in that header is the unsynchronised size
284   *  of the frame without header, so we should get that specific # of bytes in that case.
285   */
286   
287  private function fread_id3_synchronised_length($f, $num, $IsUnsynchronised, &$LeftToRead)
288  {
289    $frame = '';
290    $totalread = 0;
291   
292    while((strlen($frame) < $num) && $LeftToRead && !feof($f))
293    {
294      $LeftToRead -= $num-strlen($frame);
295      $frame .= fread($f, $num-strlen($frame));         
296      if($IsUnsynchronised)
297      {
298        $frame = str_replace(chr(0xff).chr(0x00), chr(0xff), $frame);         
299      }
300     
301      while(strlen($frame) && (ord($frame[0]) == 0))
302      {
303        // We have picked up a NUL padding?       
304        $frame = substr($frame,1);
305      }
306    }
307   
308    return $frame;
309  }
310 
311  /** Given a file handle seeked to the first byte of an id3v2.X header,
312   *  return an array of Property => Value for the id3 properties we can handle (currently T and W prefixes)
313   *  if a property has a given name in id3ParserDe77::$tags, then this will be set also (as a reference).
314   */
315   
316  private function read_v2($f)
317  {   
318    $header = fread($f, 10);
319    $header = @unpack("a3signature/C1version_major/C1version_minor/C1flags/Nsize", $header);
320    $header['size'] = $this->desync_size($header['size']);
321   
322    if($header['signature'] == '3DI')
323    {
324      // This is a footer for a v4, seek up to the start of the data after the header
325      // We don't need to read the header, it's the same as the footer     
326      fseek($f, 0-$header['size']-10, SEEK_CUR);
327    }
328   
329    $header['version_major'] = hexdec($header['version_major']);
330    $header['version_minor'] = hexdec($header['version_minor']);
331   
332    switch($header['version_major'])
333    {
334      case 4:
335        $result = $this->read_v24($f, $header);       
336        break;
337       
338      case 3:
339        $result = $this->read_v23($f, $header);       
340        break;
341       
342      case 2:
343        $result = $this->read_v22($f, $header);
344        break;
345       
346      default:       
347        $result = array();
348        break;
349    }
350   
351    if(count($result)) $result['id3'] = '2.'.$header['version_major'].'.'.$header['version_minor'];
352     
353    return $result;
354  }
355 
356  private function read_v22($f, $header)
357  {   
358    $LeftToRead = $header['size'];
359    $IsUnsynchronised  = $header['flags'] & (1<<7);
360    $IsCompressed      = $header['flags'] & (1<<6);
361   
362    if($IsCompressed) { return array(); }
363   
364    // At this point we should be looking at a frame header on the stream   
365    $result = array();
366    while(($LeftToRead > 6) && !feof($f))
367    {   
368      $frame = fread($f, 6);//$this->fread_id3_synchronised_length($f, 6, $IsUnsynchronised, $LeftToRead);
369           
370      if(strlen($frame) < 6) continue; // Bad frame
371      $frame = unpack('a3id/C3size', $frame ); 
372      $frame['size'] = ($frame['size1']<<14)|($frame['size2']<<7)|($frame['size3']);
373     
374      if($frame['size'] == 0) break; // We are now into padding area.     
375      if($frame['size'] > (1024*1024)) { fseek($f, $frame['size'], SEEK_CUR); $LeftToRead -= $frame['size']; continue; }
376
377      // Read the value of the frame       
378      $value = fread($f, $frame['size']);
379      $LeftToRead -= $frame['size'];
380      $frame['value'] = $value;
381     
382      if($IsUnsynchronised)
383      {
384        $value = str_replace(chr(0xff).chr(0x00), chr(0xff), $value);
385      }
386         
387      switch($frame['id'][0])
388      {
389        case 'T':
390          $value = $this->decode_v23_text_value($value);
391          // The old id3v1 genre can be included in this textual information
392          if($frame['id'] == 'TCO' && preg_match('/\(([0-9]+)\)/', $value, $M))
393          {
394            if(isset($this->genres[$M[1]]))
395            {
396              $value = $this->genres[$M[1]];
397            }
398          }
399          $result[$frame['id']] = $value;
400          if(isset($this->tags[$frame['id']]))
401          {
402            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
403          }
404          break;
405         
406        case 'W':
407          $result[$frame['id']] = $value;
408          if(isset($this->tags[$frame['id']]))
409          {
410            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
411          }         
412        break;
413      }
414    }
415       
416    return $result;
417 
418  }
419 
420  private function read_v24($f, $header)
421  {       
422    $IsUnsynchronised  = $header['flags'] & (1<<7);
423    $HasExtendedHeader = $header['flags'] & (1<<6);
424    $IsExperimental    = $header['flags'] & (1<<5);
425           
426    $LeftToRead = $header['size'];
427    if($HasExtendedHeader)
428    {   
429      $exHeader = unpack('Nsize', fread($f, 4));
430      $exHeader['size'] = $this->desync_size($exHeader['size']);
431     
432      fread($f, $exHeader['size']-4); // Dont' care about this we are just getting rid of it.
433      $LeftToRead -= $exHeader['size'];
434    }
435   
436    // At this point we should be looking at a frame header on the stream
437   
438    $result = array();
439    while(($LeftToRead > 10) && !feof($f))
440    {   
441      $frame = $this->fread_id3_synchronised_length($f, 10, $IsUnsynchronised, $LeftToRead);
442
443      if(strlen($frame) < 10) continue; // Bad frame
444      $frame = unpack('a4id/Nsize/C2flags', $frame );       
445      $frame['size'] = $this->desync_size($frame['size']);
446     
447      if($frame['size'] == 0) break; // We are now into padding area.
448     
449      if( $frame['flags2'] & (1<<7) // Compressed
450      ||  $frame['flags2'] & (1<<6) // Encrypted
451      )
452      {         
453        // Can't work with these
454        fseek($f, $frame['size'], SEEK_CUR); // Dont' care about this we are just getting rid of it.
455        $LeftToRead -= $frame['size'];
456        continue;
457      }
458     
459      if($frame['flags2'] & (1<<5)) // Grouping
460      {
461        fread($f,1); // Get rid of the group byte       
462        $LeftToRead -= 1;
463        $frame['size']--; // it is included in the frame size?
464      }
465     
466      if($frame['flags2'] & 1) // Data length
467      {
468        fread($f,4); // Get rid of the group byte       
469        $LeftToRead -= 4;
470        $frame['size'] -= 4; // it is included in the frame size?
471      }
472     
473      if($frame['size'] > (1024*1024)) { fseek($f, $frame['size'], SEEK_CUR); $LeftToRead -= $frame['size']; continue; }
474
475      // Read the value of the frame     
476      $value = fread($f, $frame['size']);
477      $LeftToRead -= $frame['size'];
478      $frame['value'] = $value;
479     
480      if($IsUnsynchronised)
481      {
482        $value = str_replace(chr(0xff).chr(0x00), chr(0xff), $value);
483      }
484         
485      switch($frame['id'][0])
486      {
487        case 'T':
488          $value = $this->decode_v23_text_value($value);
489          // The old id3v1 genre can be included in this textual information
490          if($frame['id'] == 'TCON' && preg_match('/\(([0-9]+)\)/', $value, $M))
491          {
492            if(isset($this->genres[$M[1]]))
493            {
494              $value = $this->genres[$M[1]];
495            }
496          }
497          $result[$frame['id']] = $value;
498          if(isset($this->tags[$frame['id']]))
499          {
500            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
501          }
502          break;
503         
504        case 'W':
505          $result[$frame['id']] = $value;
506          if(isset($this->tags[$frame['id']]))
507          {
508            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
509          }         
510        break;
511      }
512    }
513    // echo "VERSION 4\n";   
514    return $result;
515  }
516
517  /** Given a file handle seeked to the first byte after the header, and the header decoded in an array,
518   *  return an array of Property => Value for the id3 properties we can handle (currently T and W prefixes)
519   *  if a property has a given name in id3ParserDe77::$tags, then this will be set also (as a reference).
520   */
521   
522  private function read_v23($f, $header)
523  {       
524    $IsUnsynchronised  = $header['flags'] & (1<<7);
525    $HasExtendedHeader = $header['flags'] & (1<<6);
526    $IsExperimental    = $header['flags'] & (1<<5);
527       
528   
529    $LeftToRead = $header['size'];
530    if($HasExtendedHeader)
531    {   
532      $exHeader = unpack('Nsize', fread($f, 4));
533      fread($f, $exHeader['size']); // Dont' care about this we are just getting rid of it.
534      $LeftToRead -= 4 + $exHeader['size'];
535    }
536   
537    // At this point we should be looking at a frame header on the stream
538   
539    $result = array();
540    while(($LeftToRead > 10) && !feof($f))
541    {   
542      $frame = $this->fread_id3_synchronised_length($f, 10, $IsUnsynchronised, $LeftToRead);
543
544      if(strlen($frame) < 10) continue; // Bad frame
545      $frame = unpack('a4id/Nsize/C2flags', $frame );           
546      if($frame['size'] == 0) break; // We are now into padding area.
547     
548      if( $frame['flags2'] & (1<<7) // Compressed
549      ||  $frame['flags2'] & (1<<6) // Encrypted
550      )
551      {         
552        // Can't work with these
553        fseek($f, $frame['size'], SEEK_CUR); // Dont' care about this we are just getting rid of it.
554        $LeftToRead -= $frame['size'];
555        continue;
556      }
557     
558      if($frame['flags2'] & (1<<5)) // Grouping
559      {
560        fread($f,1); // Get rid of the group byte       
561        $LeftToRead -= 1;
562        $frame['size']--; // it is included in the frame size
563      }
564     
565      if($frame['size'] > (1024*1024)) { fseek($f, $frame['size'], SEEK_CUR); $LeftToRead -= $frame['size']; continue; }
566
567      // Read the value of the frame       
568      $value = fread($f, $frame['size']);
569      $LeftToRead -= $frame['size'];
570      $frame['value'] = $value;
571     
572      if($IsUnsynchronised)
573      {
574        $value = str_replace(chr(0xff).chr(0x00), chr(0xff), $value);
575      }
576         
577      switch($frame['id'][0])
578      {
579        case 'T':
580          $value = $this->decode_v23_text_value($value);
581          // The old id3v1 genre can be included in this textual information
582          if($frame['id'] == 'TCON' && preg_match('/\(([0-9]+)\)/', $value, $M))
583          {
584            if(isset($this->genres[$M[1]]))
585            {
586              $value = $this->genres[$M[1]];
587            }
588          }
589          $result[$frame['id']] = $value;
590          if(isset($this->tags[$frame['id']]))
591          {
592            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
593          }
594          break;
595         
596        case 'W':
597          $result[$frame['id']] = $value;
598          if(isset($this->tags[$frame['id']]))
599          {
600            $result[$this->tags[$frame['id']]] =& $result[$frame['id']];
601          }         
602        break;
603      }
604    }
605       
606    return $result;
607  }
608
609  /** Given a file handle seeked to the first byte of the header
610   *  return an array of Property => Value for the id3 properties we can handle
611   *  v1 properties are only title, artist, album, year, comment and genre
612   */
613   
614  public function read_v1($f)
615  {   
616    fseek($f, -128, SEEK_END);
617    $id3 = fread($f, 128);
618   
619    $id3 = @unpack("a3signature/a30title/a30artist/a30album/a4year/a30comment/c1genre", $id3);
620    $id3['genre'] = @$this->genres[$id3['genre']];
621   
622    if (!$id3['signature'] == 'TAG')
623    {
624      $this->error = 'This file does not contain ID3 v1 tag';   
625      return false;   
626    }
627   
628    unset($id3['signature']);
629    $id3['id3'] = 1;   
630    return $id3; 
631  }
632}
Note: See TracBrowser for help on using the repository browser.