source: trunk/contrib/lc_create_merged_file.php @ 1404

Last change on this file since 1404 was 1404, checked in by gogo, 20 months ago

Setup to sort the language data files properly by key, I meant to do that before committing the big patch.

File size: 18.3 KB
Line 
1<?php
2
3  /** Generate a master translation file for a given language.
4   *
5   *  The master file makes it easier for people to do a translation job,
6   *   it combines the text to be translated from all language files for
7   *   a given language into a single file.  Duplicates are so marked
8   *   so that they do not need to be translated multiple times.
9   * 
10   *  New translations are put in a section allowing that.
11   *
12   *  Pass a completed master translation file to
13   *    "split_translation_file.php"
14   *  in order to break it apart and update the individual languages.
15   *
16   * The resulting file is a json file of this structure...
17   
18 {
19    'Xinha': {
20      'Hello World': 'Bonjour Tout Le Monde',
21      'Good Bye':    'Au revoir',
22    },
23   
24    'TableOperations': {
25      'Insert Table': 'Insérer un tableau'
26    },
27   
28    __NEW_TRANSLATIONS__: {
29   
30      'Xinha': {
31        'Hi': '',                          // This translation is new, translate as appropriate.
32      },
33     
34      'TableOperations': {
35
36        'Good Bye': 'Au revoir',           // This translation is new for this context (TableOperations) but
37                                           //  was found in some other context (possibly as a obsolete translation)
38                                           //  check that it is correct and appropriate in this context.
39     
40        'Hi': '<<Xinha>>'                  // This is a new translation in a prior context pending translation
41                                           //  it will use that new translation, or if not appropriate the
42                                           //  translator can replace tehe link (<<Xinha>>) with a specific
43                                           //  translation.
44      }
45   
46    }
47 }
48   
49*/
50
51// Yes I know that this code is all a horrible mess of copy-paste-coding, it
52// was hacked togethor as I went along deciding what worked and what didn't
53// it's not worth refactoring to clean it up.
54
55  $TargetLanguage = $argv[1];
56  $OutputFile     = @$argv[2];
57  if($OutputFile == '-') $OutputFile = null;
58 
59  if(!@$TargetLanguage || $TargetLanguage == '-h' || $TargetLanguage == '--help' )
60  {
61    echo "
62   
63Usage: {$argv[0]} {ln} [of]
64   ln: language code, eg fr
65   of: file to put the generated json into, eg lang/merged/fr.js
66                   defaults to stdout
67   
68   You must have created the language base files using lc_parse_strings.php
69   
70   PROTIP:  On a half decent unix type box you should be able to do this from
71      the Xinha root directory to create all of the lang/merged/*.js files
72     
73   if ! [ -f XinhaCore.js ] ; then cd ../; fi
74   php contrib/lc_parse_strings.php
75   for lang in $(find . -wholename \"*/lang/*.js\" | sed -r 's/.*\///' | sort | uniq | grep -v base | sed -r 's/.js//')
76   do
77     php contrib/".basename($argv[0]) . " \$lang lang/merged/\$lang.js
78   done
79   php contrib/".basename($argv[0]) . " NEW lang/merged/__new__.js
80   
81";
82    exit(1);
83  }
84 
85  $XinhaRoot = realpath(dirname(__FILE__).'/..');
86 
87  $languageDirs = array(
88    $XinhaRoot.'/lang',
89  );
90 
91  function load_lang_file($file)
92  {
93    if(!file_exists($file)) return array();
94   
95    $contents = file_get_contents($file);
96    $contents = preg_replace('/^(\xEF\xBB\xBF)?\s*\/\/.*$/m', '', trim($contents));
97    $contents = preg_replace('/\/\*.*?\*\//s', '', $contents);
98   
99    //JSON only doubel quotes
100    $contents = preg_replace_callback('/^(\s*)\'(([^\']|\\\\\')+?)\'(\s*:)/m', function($m){
101      $m[2] =  preg_replace('/\\\\\'/', '\'', $m[2]);
102      $m[2] = preg_replace('/"/', '\\"',$m[2]);
103      return $m[1].'"'.$m[2].'"'.$m[4];
104    }, $contents);
105   
106    $contents = preg_replace_callback('/^(\s*"(?:[^"]|\\\\")+?"\s*:\s*)\'(([^\']|\\\\\')+)(\s*)\'/m', function($m){
107      $m[2] =  preg_replace('/\\\\\'/', '\'', $m[2]);
108      $m[2] = preg_replace('/"/', '\\"',$m[2]);
109      return $m[1].'"'.$m[2].'"'.$m[4];
110    }, $contents);
111   
112    // Escaped singles are forbidden?
113    $contents = preg_replace('/\\\\\'/', "'", $contents);
114   
115    $contents= trim(trim(trim($contents),';'));
116    $decode = json_decode($contents, true);
117   
118    if(!$decode)
119    {
120      echo $contents;
121      throw new Exception("Decode of {$file} failed. " . json_last_error());
122    }
123    return $decode;
124  }
125
126  foreach(array('modules', 'plugins', 'unsupported_plugins') as $dir)
127  {
128    $dh = opendir($XinhaRoot.'/'.$dir);
129    while($f = readdir($dh))
130    {
131      if($f[0] == '.') continue;
132      if(is_dir($XinhaRoot.'/'.$dir.'/'.$f.'/lang'))
133      {
134        if(!file_exists($XinhaRoot.'/'.$dir.'/'.$f.'/lang'.'/lc_base.js'))
135        {
136          fprintf(STDERR, "Warning: No base language file found for " .$XinhaRoot.'/'.$dir.'/'.$f.'/lang'.'/lc_base.js'.", ensure that you run lc_parse_strings.php to generate lc_base.js\n");
137          continue;
138        }
139        $languageDirs[] = $XinhaRoot.'/'.$dir.'/'.$f.'/lang';
140      }
141    }
142  }
143
144  // Sort the language directories by module/plugin name, except unsupported which are last (and then alpha)
145  // and the core which is first
146  function what_type($a)
147  {
148    if(preg_match('/modules/', $a)) return 'MODULE';
149    if(preg_match('/unsupported/', $a)) return 'UNSUPPORTED';
150    if(preg_match('/plugins/', $a)) return 'PLUGIN';
151    return 'CORE';
152  }
153 
154  function what_name($a)
155  {
156    switch(what_type($a))
157    {
158      case 'CORE': return 'Xinha';
159    }
160    return basename(dirname($a));
161  }
162 
163  function sort_lang_dir($a, $b)
164  {
165    if(what_type($a) === what_type($b)) return strcmp(what_name($a), what_name($b));
166   
167    if(what_type($a) == 'CORE') return -1;
168    if(what_type($b) == 'CORE') return  1;
169   
170    if(what_type($a) == 'UNSUPPORTED') return 1;
171    if(what_type($b) == 'UNSUPPORTED') return -1;
172   
173     return strcmp(what_name($a), what_name($b));
174  }
175  usort($languageDirs, 'sort_lang_dir');
176 
177  // Record the reference for the first time we encounter a translated string
178  $firstReference = array();
179 
180  $outputData     = array();
181 
182  // Setup the array so that the ordering is correct
183  $Nt = '__ TRANSLATOR NOTE __';   
184  foreach($languageDirs as $langX => $dir)
185  {
186    $moduleName = '';
187   
188    if($langX == 0)
189    {
190      $moduleName = 'Xinha';
191      $moduleType = 'CORE';
192    }
193    elseif(preg_match('/\/(modules|unsupported_plugins|plugins)\/([^\/]+)/', $dir, $M))
194    {
195      $moduleName = $M[2];
196      switch($M[1])
197      {
198        case 'modules':
199          $moduleType = 'MODULE';
200          break;
201         
202        case 'plugins':
203          $moduleType = 'PLUGIN';
204          break;
205         
206        case 'unsupported_plugins':
207          $moduleType = 'UNSUPPORTED';
208          break;
209      }
210    }
211    else
212    {
213      fprintf(STDERR, "Unable to figure out a module name for {$dir} [".__LINE__."]");
214      exit;
215    }
216   
217    $outputData['__NEW_TRANSLATIONS__'][$moduleName] = array();
218    $outputData[$moduleName] = array();
219         
220    if($moduleType == 'UNSUPPORTED')
221    {
222      $outputData['__NEW_TRANSLATIONS__'][$moduleName][$Nt] = "*** ".strtoupper($moduleName)." IS UNSUPPORTED (TRANSLATE AT YOUR DISCRETION) ***";
223    }
224  }
225  ksort($outputData);
226  ksort($outputData['__NEW_TRANSLATIONS__']);
227 
228  //Pull the unsuipported ones out of new translations and put last
229  foreach(array_keys($outputData['__NEW_TRANSLATIONS__']) as $k)
230  {
231    if(@$outputData['__NEW_TRANSLATIONS__'][$k][$Nt])
232    {
233      $v = $outputData['__NEW_TRANSLATIONS__'][$k];
234      unset($outputData['__NEW_TRANSLATIONS__'][$k]);
235      $outputData['__NEW_TRANSLATIONS__'][$k] = $v;
236    }
237  }
238 
239 
240  // First do the existing translations
241  foreach($languageDirs as $langX => $dir)
242  {
243    $baseLang   = json_decode(trim(preg_replace('/^\s*\/\/.*$/m', '', file_get_contents($dir.'/lc_base.js'))),TRUE);
244    $targetLang = load_lang_file(($dir.'/'.$TargetLanguage.'.js'));
245    $moduleName = '';
246   
247    if($langX == 0)
248    {
249      $moduleName = 'Xinha';
250      $moduleType = 'CORE';
251    }
252    elseif(preg_match('/\/(modules|unsupported_plugins|plugins)\/([^\/]+)/', $dir, $M))
253    {
254      $moduleName = $M[2];
255      switch($M[1])
256      {
257        case 'modules':
258          $moduleType = 'MODULE';
259          break;
260         
261        case 'plugins':
262          $moduleType = 'PLUGIN';
263          break;
264         
265        case 'unsupported_plugins':
266          $moduleType = '*** UNSUPPORTED (TRANSLATE AT YOUR DISCRETION) ***';
267          break;
268      }
269    }
270    else
271    {
272      fprintf(STDERR, "Unable to figure out a module name for {$dir} [".__LINE__."]");
273      exit;
274    }
275   
276    if(!is_array($baseLang))
277    {
278      echo "\n\n";
279      print_r($baseLang);
280      echo $dir.'/lc_base.js' . "\n";
281      echo trim(preg_replace('/^\s*\/\/.*$/m', '', file_get_contents($dir.'/lc_base.js'))) . "\n";
282      print_r($baseLang);
283      echo "\n".json_last_error()."\n";
284      die();
285     
286    }
287   
288    // Do the existing translations first
289    foreach($baseLang as $English => $Nothing)
290    {
291       
292      if(isset($targetLang[$English]) && strlen($targetLang[$English]))
293      {
294        if(!isset($outputData[$moduleName]))
295        {
296          $outputData[$moduleName]= array(
297      //      '__TYPE__'      => $moduleType,
298      //      '__LANG_FILE__' => $dir.'/'.$TargetLanguage.'.js'
299          );
300        }
301        // It is translated
302        if(!isset($firstReference[$English])) $firstReference[$English] = /*"<<$moduleName>>".*/$targetLang[$English];
303        $outputData[$moduleName][$English] = $targetLang[$English];
304      }
305    }
306  }
307 
308  foreach($languageDirs as $langX => $dir)
309  {
310    $baseLang   = json_decode(preg_replace('/^\s*\/\/.*$/m', '', file_get_contents($dir.'/lc_base.js')),TRUE);
311    $targetLang = load_lang_file(($dir.'/'.$TargetLanguage.'.js'));
312    $moduleName = '';
313   
314    if($langX == 0)
315    {
316      $moduleName = 'Xinha';
317      $moduleType = 'CORE';
318    }
319    elseif(preg_match('/\/(modules|unsupported_plugins|plugins)\/([^\/]+)/', $dir, $M))
320    {
321      $moduleName = $M[2];
322      switch($M[1])
323      {
324        case 'modules':
325          $moduleType = 'MODULE';
326          break;
327         
328        case 'plugins':
329          $moduleType = 'PLUGIN';
330          break;
331         
332        case 'unsupported_plugins':
333          $moduleType = '*** UNSUPPORTED (TRANSLATE AT YOUR DISCRETION) ***';
334          break;
335      }
336    }
337    else
338    {
339      fprintf(STDERR, "Unable to figure out a module name for {$dir} [".__LINE__."]");
340      exit;
341    }
342   
343    // Record translations for any obsolete ones as a first reference
344    //  the larget language files may have a section for obsolete translations
345    //  called __OBSOLETE__  - this is added by the lc_split_merged_file.php
346    //  script.
347    // Merge this into all the strings to examine
348    if(isset($targetLang['__OBSOLETE__']))
349    {
350      $targetLang = array_merge($targetLang, $targetLang['__OBSOLETE__']);
351      unset($targetLang['__OBSOLETE__']);
352    }
353   
354    // Record a first reference if not already recorded for all strings in the
355    // target
356    foreach($targetLang as $English => $Local)
357    {
358      if(!isset($firstReference[$English]))
359      {
360        $firstReference[$English] = $Local;
361      }
362    }
363  }
364 
365  // Then do the new ones
366  foreach($languageDirs as $langX => $dir)
367  {
368 
369  echo "Process ".$dir.'/lc_base.js'."\n";
370 
371    $baseLang   = json_decode(trim(preg_replace('/^\s*\/\/.*$/m', '', file_get_contents($dir.'/lc_base.js'))), true);
372    $targetLang = load_lang_file(($dir.'/'.$TargetLanguage.'.js'));
373    $moduleName = '';
374    $moduleType = '';
375    if($langX == 0)
376    {
377      $moduleName = 'Xinha';
378      $moduleType = 'CORE';
379    }
380    elseif(preg_match('/\/(modules|unsupported_plugins|plugins)\/([^\/]+)/', $dir, $M))
381    {
382      $moduleName = $M[2];
383      switch($M[1])
384      {
385        case 'modules':
386          $moduleType = 'MODULE';
387          break;
388         
389        case 'plugins':
390          $moduleType = 'PLUGIN';
391          break;
392         
393        case 'unsupported_plugins':
394          $moduleType = 'UNSUPPORTED';
395          break;
396      }
397    }
398    else
399    {
400      fprintf(STDERR, "Unable to figure out a module name for {$dir} [".__LINE__."]");
401      exit;
402    }
403   
404    // Do the new translations now
405    foreach($baseLang as $English => $Nothing)
406    {
407
408      if(!isset($targetLang[$English]))
409      {
410       
411        // Do we have an existing translation in some other module?
412        if(isset($firstReference[$English]))
413        {
414          $outputData['__NEW_TRANSLATIONS__'][$moduleName][$English]  = "{$firstReference[$English]}";
415        }
416        // Do we have a lowercase translation?
417        elseif(isset($firstReference[strtolower($English)]))
418        {
419          $Ref = $firstReference[strtolower($English)];
420          $outputData['__NEW_TRANSLATIONS__'][$moduleName][$English]  = "{$Ref}";
421        }
422        // Do we have a translation without a :?
423        elseif(preg_match('/:\s*$/',$English) && isset($firstReference[preg_replace('/:\s*$/', '', ($English))]))
424        {
425          $Ref = $firstReference[preg_replace('/:\s*$/', '', ($English))];
426          $outputData['__NEW_TRANSLATIONS__'][$moduleName][$English]  = "{$Ref}";
427        }
428        // Do we have a translation with a :?
429        elseif((!preg_match('/:\s*$/',$English)) && isset($firstReference[($English.':')]))
430        {
431          $Ref = $firstReference[($English.':')];
432          $outputData['__NEW_TRANSLATIONS__'][$moduleName][$English]  = "{$Ref}";
433        }
434        // Nothing appropriate was found
435        else
436        {
437          $firstReference[$English] = '<<'.$moduleName.'>>';
438          $outputData['__NEW_TRANSLATIONS__'][$moduleName][$English] = '';
439        }
440      }
441    }
442  }
443 
444  // We want to sort the untranslated to the top, put the translated, and linked, to the bottom
445  function sort_lang($array)
446  {
447    $temp = array();
448    foreach($array as $k=>$v)
449    {
450      $temp[] = array($k, $v);
451    }
452   
453    usort($temp, function($a,$b) {
454    if(preg_match('/^__.*__$/', $a[0])  && !preg_match('/^__.*__$/', $b[0])) return -1; // Put __xxx__ informational at the start
455    if(!preg_match('/^__.*__$/', $a[0]) && preg_match('/^__.*__$/', $b[0])) return   1;
456   
457    if($a[1] == '' && $b[1] != '') return -1; // Put  empty translations next
458    if($a[1] != '' && $b[1] == '') return 1;
459 
460    // Put translated next
461    if(preg_match('/^<</', $a[1]) && !preg_match('/^<</', $b[1])) return 1;
462    if(preg_match('/^<</', $b[1]) && !preg_match('/^<</', $a[1])) return -1;
463   
464    // And now by alpha
465    return strcmp(strtolower(trim($a[0])),strtolower(trim($b[0])));
466   
467    });
468   
469    //print_r($temp);
470    $temp2 = array();
471    foreach($temp as $k=>$v)
472    {
473      $temp2[$v[0]] = $v[1];
474    }
475   
476    return $temp2;
477  }
478 
479  foreach(array_keys($outputData['__NEW_TRANSLATIONS__']) as $moduleName)
480  {
481    $outputData['__NEW_TRANSLATIONS__'][$moduleName] = sort_lang($outputData['__NEW_TRANSLATIONS__'][$moduleName]);
482  }
483 
484  // Clean up any empty sets
485 
486  foreach(array_keys($outputData['__NEW_TRANSLATIONS__']) as $k)
487  {
488    if(!count($outputData['__NEW_TRANSLATIONS__'][$k]))
489    {
490      unset($outputData['__NEW_TRANSLATIONS__'][$k]);
491      continue;
492    }
493   
494    // Unsupported ones with only the note
495    if(count($outputData['__NEW_TRANSLATIONS__'][$k]) == 1 && isset($outputData['__NEW_TRANSLATIONS__'][$k][$Nt]))
496    {
497      unset($outputData['__NEW_TRANSLATIONS__'][$k]);
498      continue;
499    }
500  }
501 
502  foreach(array_keys($outputData) as $k)
503  {
504    if(!count($outputData[$k])) unset($outputData[$k]);
505  }
506 
507 
508 
509  $outputData = json_encode($outputData, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE);
510 
511  $data = "// Xinha Language Combined Translation File\n";
512  $data .= "//\n";
513  $data .= "// LANG: \"$TargetLanguage\", ENCODING: UTF-8\n";
514  $data .= "//\n";   
515  $data .= "// INSTRUCTIONS TO TRANSLATORS
516// ===========================================================================
517//
518// Your translation must be in UTF-8 Character Encoding.
519//
520// This is a JSON encoded file (plus comments), strings should be double-quote
521// only, do not use single quotes to surround strings - \"hello\", not 'hello'
522// do not have a trailing comma after the last entry in a section.
523//
524// Only full line comments are allowed (that a comments occupy entire lines).
525//
526// Search for the __NEW_TRANSLATIONS__ section below, this is where you will
527// want to focus, this section includes things that do not presently have a
528// translation or for which the translation needs to be checked for accuracy.
529//
530// In the New Translations section a translation string is one of the following
531//
532//  \"English String Here\" : \"\"
533//     This means it is not translated yet, add your translation...
534//     \"English String Here\" : \"Klingon String Here\"
535//
536//  \"English String Here\" : \"Translated String Here\"
537//     This means that an existing translation for this string, in some other
538//     section has been found, and used.  Check that it is approprite for this
539//     section and if it is, that's fine leave it as is, otherwise change as
540//     appropriate.
541//
542//  \"English String Here\" : \"<<AnotherSection>>\"
543//     This means use the same translation for this string as <<AnotherSection>>
544//     this saves you re-tranlating strings.  If the Context of this section
545//     and the context of AnotherSection seem the same, that's fine leave it
546//     using that translation, but if this section needs a different translation,
547//     you can provide it by replacing the link (<<AnotherSection>>) with that
548//     new translation.  For example - a \"Table\" in say \"DataPlugin\" is
549//     perhaps translated differently to \"Table\" in \"FurniturePlugin\".
550//
551// TESTING YOUR TRANSLATION
552// ===========================================================================
553// Simply place your translation file on your webserver somewhere for example
554//
555//   /xinha/lang/merged/{$TargetLanguage}.js
556//
557// and then tell Xinha where to get it (before loading XinhaCore.js) by
558//
559//  _editor_lang              = '{$TargetLanguage}';
560//  _editor_lang_merged_file  = '/xinha/lang/merged/{$TargetLanguage}.js';
561//
562// Xinha will load your new language definition.
563//
564// SUBMITTING YOUR TRANSLATION
565// ===========================================================================
566// Simply create a Ticket on the Xinha website and attach your translation
567// file.
568//
569// The Xinha developers will take your file and use the
570//     contrib/lc_split_merged_file.php
571// script to load it into the Xinha distribution.
572";
573  $data .= "\n";
574 
575  if(!@$OutputFile)
576  {
577    echo $data . $outputData;
578  }
579  else
580  {
581    file_put_contents($OutputFile, $data.$outputData);
582  }
583?>
Note: See TracBrowser for help on using the repository browser.