| #!/usr/bin/perl -s |
| # __________ __ ___. |
| # Open \______ \ ____ ____ | | _\_ |__ _______ ___ |
| # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / |
| # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
| # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
| # \/ \/ \/ \/ \/ |
| # $Id$ |
| # |
| # Copyright (C) 2006 - 2008 by Daniel Stenberg |
| # |
| |
| # See apps/language.c (TODO: Use common include for both) |
| # Cookie and binary version for the binary lang file |
| my $LANGUAGE_COOKIE = 0x1a; |
| my $LANGUAGE_VERSION = 0x04; |
| |
| # A note for future users and readers: The original v1 language system allowed |
| # the build to create and use a different language than english built-in. We |
| # removed that feature from our build-system, but the build scripts still had |
| # the ability. But, starting now, this ability is no longer provided since I |
| # figured it was boring and unnecessary to write support for now since we |
| # don't use it anymore. |
| |
| my %user2num = |
| ('core' => 1); |
| |
| |
| if(!$ARGV[0]) { |
| print <<MOO |
| Usage: genlang [options] <langv2 file> |
| |
| -p=<prefix> |
| Make the tool create a [prefix].c and [prefix].h file. |
| |
| -b=<outfile> |
| Make the tool create a binary language (.lng) file named [outfile]. |
| The use of this option requires that you also use -e, -t and -i. |
| |
| -u |
| Update language file. Given the translated file and the most recent english |
| file, you\'ll get an updated version sent to stdout. Suitable action to do |
| when you intend to update a translation. |
| |
| -e=<english lang file> |
| Point out the english (original source) file, to use that as master |
| language template. Used in combination with -b, -u or -s. |
| |
| -s |
| Sort the Update language file in the same order as the strings in the |
| English file. |
| |
| -t=<target> |
| Specify which target you want the translations/phrases for. Required when |
| -b or -p is used. |
| |
| The target can in fact be specified as numerous different strings, |
| separated with colons. This will make genlang to use all the specified |
| strings when searching for a matching phrase. |
| |
| -i=<target id> |
| The target id number, needed for -b. |
| |
| -o |
| Voice mode output. Outputs all id: and voice: lines for the given target! |
| |
| -v |
| Enables verbose (debug) output. |
| MOO |
| ; |
| exit; |
| } |
| |
| # How update works: |
| # |
| # 1) scan the english file, keep the whole <phrase> for each phrase. |
| # 2) read the translated file, for each end of phrase, compare: |
| # A) all source strings, if there's any change there should be a comment about |
| # it output |
| # B) the desc fields |
| # |
| # 3) output the phrase with the comments from above |
| # 4) check which phrases that the translated version didn't have, and spit out |
| # the english version of those |
| # |
| |
| my $prefix = $p; |
| my $binary = $b; |
| my $update = $u; |
| my $sortfile = $s; |
| |
| my $english = $e; |
| my $voiceout = $o; |
| |
| my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0); |
| |
| if($check > 1) { |
| print STDERR "Please use only one of -p, -u, -o, -b and -s\n"; |
| exit; |
| } |
| if(!$check) { |
| print STDERR "Please use at least one of -p, -u, -o, -b and -s\n"; |
| exit; |
| } |
| |
| |
| if(($binary || $update || $voiceout || $sortfile) && !$english) { |
| print STDERR "Please use -e too when you use -b, -o, -u or -s\n"; |
| exit; |
| } |
| |
| my $target_id = $i; |
| if($binary && !$target_id) { |
| print STDERR "Please specify a target id number (with -i)!\n"; |
| exit; |
| } |
| |
| my $target = $t; |
| if(!$target && !$update && !$sortfile) { |
| print STDERR "Please specify a target (with -t)!\n"; |
| exit; |
| } |
| my $verbose=$v; |
| |
| my %id; # string to num hash |
| my @idnum; # num to string array |
| |
| my %allphrases; # For sorting - an array of the <phrase> elements |
| my %source; # id string to source phrase hash |
| my %dest; # id string to dest phrase hash |
| my %voice; # id string to voice phrase hash |
| |
| my $input = $ARGV[0]; |
| |
| my @m; |
| my $m="blank"; |
| |
| sub trim { |
| my ($string) = @_; |
| $string =~ s/^\s+//; |
| $string =~ s/\s+$//; |
| return $string; |
| } |
| |
| sub match { |
| my ($string, $pattern)=@_; |
| |
| $pattern =~ s/\*/.*/g; |
| $pattern =~ s/\?/./g; |
| |
| return ($string =~ /^$pattern\z/); |
| } |
| |
| sub blank { |
| # nothing to do |
| } |
| |
| my %head; |
| sub header { |
| my ($full, $n, $v)=@_; |
| $head{$n}=$v; |
| } |
| |
| my %phrase; |
| sub phrase { |
| my ($full, $n, $v)=@_; |
| $phrase{$n}=$v; |
| } |
| |
| sub parsetarget { |
| my ($debug, $strref, $full, $n, $v)=@_; |
| my $string; |
| my @all= split(" *, *", $n); |
| my $test; |
| for $test (@all) { |
| # print "TEST ($debug) $target for $test\n"; |
| for my $part (split(":", $target)) { |
| if(match($part, $test)) { |
| $string = $v; |
| # print "MATCH: $test => $v\n"; |
| $$strref = $string; |
| return $string; |
| } |
| } |
| } |
| } |
| |
| my $src; |
| sub source { |
| parsetarget("src", \$src, @_); |
| } |
| |
| my $dest; |
| sub dest { |
| parsetarget("dest", \$dest, @_); |
| } |
| |
| my $voice; |
| sub voice { |
| parsetarget("voice", \$voice, @_); |
| } |
| |
| my %idmap; |
| my %english; |
| if($english) { |
| # For the cases where the english file needs to be scanned/read, we do |
| # it before we read the translated file. For -b it isn't necessary, but for |
| # -u it is convenient. |
| |
| my $idnum=0; # start with a true number |
| my $vidnum=0x8000; # first voice id |
| open(ENG, "<$english") || die "Error: can't open $english"; |
| my @phrase; |
| my $id; |
| my $maybeid; |
| my $user; |
| my $maybeuser; |
| my $withindest; |
| my $numphrases = 0; |
| while(<ENG>) { |
| |
| # get rid of DOS newlines |
| $_ =~ s/\r//g; |
| |
| if($_ =~ /^ *\<phrase\>/) { |
| # this is the start of a phrase |
| } |
| elsif($_ =~ /^ *\<\/phrase\>/) { |
| |
| # if id is something, when we count and store this phrase |
| if($id) { |
| # voice-only entries get a difference range |
| if($id =~ /^VOICE_/) { |
| # Assign an ID number to this entry |
| $idmap{$id}=$vidnum; |
| $vidnum++; |
| } |
| else { |
| # Assign an ID number to this entry |
| $idmap{$id}=$idnum; |
| $idnum++; |
| # print STDERR "DEST: bumped idnum to $idnum\n"; |
| } |
| |
| # this is the end of a phrase, add it to the english hash |
| $english{$id}=join("", @phrase); |
| } |
| undef @phrase; |
| $id=""; |
| } |
| elsif($_ ne "\n") { |
| # gather everything related to this phrase |
| push @phrase, $_; |
| if($_ =~ /^ *\<dest\>/i) { |
| $withindest=1; |
| $deststr=""; |
| } |
| elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) { |
| $withindest=0; |
| |
| if($update || ($deststr && ($deststr !~ /^none\z/i))) { |
| # we unconditionally always use all IDs when the "update" |
| # feature is used |
| $id = $maybeid; |
| $user = $user2num{$maybeuser}; |
| # print "DEST: use this id $id\n"; |
| } |
| else { |
| # print "skip $maybeid for $name\n"; |
| } |
| } |
| elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) { |
| my ($name, $val)=($1, $2); |
| $dest=""; # in case it is left untouched for when the |
| # model name isn't "our" |
| dest($_, $name, $val); |
| |
| if($dest) { |
| # Store the current dest string. If this target matches |
| # multiple strings, it will get updated several times. |
| $deststr = $dest; |
| } |
| } |
| } |
| |
| if($_ =~ /^ *id: ([^ \t\n]+)/i) { |
| $maybeid=$1; |
| $sortorder{$maybeid}=$numphrases++; |
| } |
| if($_ =~ /^ *user: ([^ \t\n]+)/i) { |
| $maybeuser=$1; |
| } |
| } |
| close(ENG); |
| } |
| |
| # a function that compares the english phrase with the translated one. |
| # compare source strings and desc |
| |
| # Then output the updated version! |
| sub compare { |
| my ($idstr, $engref, $locref)=@_; |
| my ($edesc, $ldesc); |
| my ($esource, $lsource); |
| my $mode=0; |
| |
| for my $l (@$engref) { |
| if($l =~ /^ *#/) { |
| # comment |
| next; |
| } |
| if($l =~ /^ *desc: (.*)/) { |
| $edesc=$1; |
| } |
| elsif($l =~ / *\<source\>/i) { |
| $mode=1; |
| } |
| elsif($mode) { |
| if($l =~ / *\<\/source\>/i) { |
| last; |
| } |
| $esource .= "$l\n"; |
| } |
| } |
| |
| my @show; |
| my @source; |
| |
| $mode = 0; |
| for my $l (@$locref) { |
| if($l =~ /^ *desc: (.*)/) { |
| $ldesc=$1; |
| if(trim($edesc) ne trim($ldesc)) { |
| $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n"; |
| } |
| push @show, $l; |
| } |
| elsif($l =~ / *\<source\>/i) { |
| $mode=1; |
| push @show, $l; |
| } |
| elsif($mode) { |
| if($l =~ / *\<\/source\>/i) { |
| $mode = 0; |
| print @show; |
| if(trim($esource) ne trim($lsource)) { |
| print "### The <source> section differs from the english!\n", |
| "### the previously used one is commented below:\n"; |
| for(split("\n", $lsource)) { |
| print "### $_\n"; |
| } |
| print $esource; |
| } |
| else { |
| print $lsource; |
| } |
| undef @show; # start over |
| |
| push @show, $l; |
| } |
| else { |
| $lsource .= "$l"; |
| } |
| } |
| else { |
| push @show, $l; |
| } |
| } |
| |
| |
| print @show; |
| } |
| |
| my $idcount; # counter for lang ID numbers |
| my $voiceid=0x8000; # counter for voice-only ID numbers |
| |
| # |
| # Now start the scanning of the selected language string |
| # |
| |
| open(LANG, "<$input") || die "Error: couldn't read language file named $input\n"; |
| my @phrase; |
| my $header = 1; |
| while(<LANG>) { |
| |
| $line++; |
| |
| # get rid of DOS newlines |
| $_ =~ s/\r//g; |
| |
| if($_ =~ /^( *\#|[ \t\n\r]*\z)/) { |
| # comment or empty line - output it if it's part of the header |
| if ($header and ($update || $sortfile)) { |
| print($_); |
| } |
| next; |
| } |
| $header = 0; |
| |
| my $ll = $_; |
| |
| # print "M: $m\n"; |
| |
| push @phrase, $ll; |
| |
| # this is an XML-lookalike tag |
| if (/^(<|[^\"<]+<)([^>]*)>/) { |
| my $part = $2; |
| # print "P: $part\n"; |
| |
| if($part =~ /^\//) { |
| # this was a closing tag |
| |
| if($part eq "/phrase") { |
| # closing the phrase |
| |
| my $idstr = $phrase{'id'}; |
| my $idnum; |
| |
| if($binary && !$english{$idstr}) { |
| # $idstr doesn't exist for english, skip it\n"; |
| } |
| elsif($dest =~ /^none\z/i) { |
| # "none" as dest (without quotes) means that this entire |
| # phrase is to be ignored |
| } |
| elsif($sortfile) { |
| $allphrases{$idstr}=join('',@phrase); |
| } |
| elsif(!$update) { |
| # we don't do the fully detailed analysis when we "update" |
| # since we don't do it for a particular target etc |
| |
| # allow the keyword 'deprecated' to be used on dest and |
| # voice strings to mark that as deprecated. It will then |
| # be replaced with "". |
| |
| $dest =~ s/^deprecate(|d)\z/\"\"/i; |
| $voice =~ s/^deprecate(|d)\z/\"\"/i; |
| |
| # basic syntax error alerts, if there are no quotes we |
| # will assume an empty string was intended |
| if($dest !~ /^\"/) { |
| print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n"; |
| $dest='""'; |
| } |
| if($src !~ /^\"/) { |
| print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n"; |
| $src='""'; |
| } |
| if($voice !~ /^\"/ and $voice !~ /^none\z/i) { |
| print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n"; |
| $voice='""'; |
| } |
| if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) { |
| print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n"; |
| } |
| |
| my $user = $user2num{trim($phrase{'user'})}; |
| if(!$user) { |
| print STDERR "$input:$line:1: warning: unknown user!\n"; |
| $user = 1; |
| } |
| |
| # Use the ID name to figure out which id number range we |
| # should use for this phrase. Voice-only strings are |
| # separated. |
| |
| if($idstr =~ /^VOICE/) { |
| $idnum = $voiceid++; |
| } |
| else { |
| $idnum = $idcount++; |
| } |
| |
| $id{$idstr} = $idnum; |
| $idnum[$idnum]=$idstr; |
| |
| $source{$idstr}=$src; |
| $dest{$idstr}=$dest; |
| $voice{$idstr}=$voice; |
| |
| if($verbose) { |
| print "id: $phrase{id} ($idnum)\n"; |
| print "source: $src\n"; |
| print "dest: $dest\n"; |
| print "voice: $voice\n"; |
| print "user: $user\n"; |
| } |
| |
| undef $src; |
| undef $dest; |
| undef $voice; |
| undef $user; |
| undef %phrase; |
| } |
| |
| if($update) { |
| my $e = $english{$idstr}; |
| |
| if($e) { |
| # compare original english with this! |
| my @eng = split("\n", $english{$idstr}); |
| |
| compare($idstr, \@eng, \@phrase); |
| |
| $english{$idstr}=""; # clear it |
| } |
| else { |
| print "### $idstr: The phrase is not used. Skipped\n"; |
| } |
| } |
| undef @phrase; |
| |
| } # end of </phrase> |
| |
| # starts with a slash, this _ends_ this section |
| $m = pop @m; # get back old value, the previous level's tag |
| next; |
| } # end of tag close |
| |
| # This is an opening (sub) tag |
| |
| push @m, $m; # store old value |
| $m = $part; |
| next; |
| } |
| |
| if(/^ *([^:]+): *(.*)/) { |
| my ($name, $val)=($1, $2); |
| &$m($_, $name, $val); |
| } |
| } |
| close(LANG); |
| |
| if($update) { |
| my $any=0; |
| for(keys %english) { |
| if($english{$_}) { |
| print "###\n", |
| "### This phrase below was not present in the translated file\n", |
| "<phrase>\n"; |
| print $english{$_}; |
| print "</phrase>\n"; |
| } |
| } |
| } |
| |
| if ($sortfile) { |
| for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) { |
| print $allphrases{$_}; |
| } |
| } |
| |
| if($prefix) { |
| # We create a .c and .h file |
| |
| open(HFILE, ">$prefix/lang.h") || |
| die "Error: couldn't create file $prefix/lang.h\n"; |
| open(CFILE, ">$prefix/lang_core.c") || |
| die "Error: couldn't create file $prefix/lang_core.c\n"; |
| |
| # get header file name |
| $headername = "$prefix/lang.h"; |
| $headername =~ s/(.*\/)*//; |
| |
| print HFILE <<MOO |
| /* This file was automatically generated using genlang */ |
| /* |
| * The str() macro/functions is how to access strings that might be |
| * translated. Use it like str(MACRO) and expect a string to be |
| * returned! |
| */ |
| #define str(x) language_strings[x] |
| |
| /* this is the array for holding the string pointers. |
| It will be initialized at runtime. */ |
| extern unsigned char *language_strings[]; |
| /* this contains the concatenation of all strings, separated by \\0 chars */ |
| extern const unsigned char language_builtin[]; |
| |
| /* The enum below contains all available strings */ |
| enum \{ |
| MOO |
| ; |
| |
| print CFILE <<MOO |
| /* This file was automaticly generated using genlang, the strings come |
| from "$input" */ |
| |
| #include "$headername" |
| |
| unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY]; |
| const unsigned char language_builtin[] = |
| MOO |
| ; |
| |
| # Output the ID names for the enum in the header file |
| my $i; |
| for $i (1 .. $idcount) { |
| my $name=$idnum[$i - 1]; # get the ID name |
| |
| $name =~ s/\"//g; # cut off the quotes |
| |
| printf HFILE (" %s, /* %d */\n", $name, $i-1); |
| } |
| |
| # Output separation marker for last string ID and the upcoming voice IDs |
| |
| print HFILE <<MOO |
| LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */ |
| /* --- below this follows voice-only strings --- */ |
| VOICEONLY_DELIMITER = 0x8000, |
| MOO |
| ; |
| |
| # Output the ID names for the enum in the header file |
| for $i (0x8000 .. ($voiceid-1)) { |
| my $name=$idnum[$i]; # get the ID name |
| |
| $name =~ s/\"//g; # cut off the quotes |
| |
| printf HFILE (" %s,\n", $name); |
| } |
| |
| # Output end of enum |
| print HFILE "\n};\n/* end of generated enum list */\n"; |
| |
| # Output the target phrases for the source file |
| for $i (1 .. $idcount) { |
| my $name=$idnum[$i - 1]; # get the ID |
| my $dest = $dest{$name}; # get the destination phrase |
| |
| $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote |
| |
| if(!$dest) { |
| # this is just to be on the safe side |
| $dest = '"\0"'; |
| } |
| |
| printf CFILE (" %s\n", $dest); |
| } |
| |
| # Output end of string chunk |
| print CFILE <<MOO |
| ; |
| /* end of generated string list */ |
| MOO |
| ; |
| |
| close(HFILE); |
| close(CFILE); |
| } # end of the c/h file generation |
| elsif($binary) { |
| # Creation of a binary lang file was requested |
| |
| # We must first scan the english file to get the correct order of the id |
| # numbers used there, as that is what sets the id order for all language |
| # files. The english file is scanned before the translated file was |
| # scanned. |
| |
| open(OUTF, ">$binary") or die "Error: Can't create $binary"; |
| binmode OUTF; |
| printf OUTF ("%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id); # magic lang file header |
| |
| # loop over the target phrases |
| for $i (1 .. $idcount) { |
| my $name=$idnum[$i - 1]; # get the ID |
| my $dest = $dest{$name}; # get the destination phrase |
| |
| if($dest) { |
| $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes |
| |
| # Now, make sure we get the number from the english sort order: |
| $idnum = $idmap{$name}; |
| |
| printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest); |
| } |
| } |
| } |
| elsif($voiceout) { |
| # voice output requested, display id: and voice: strings in a v1-like |
| # fashion |
| |
| my @engl; |
| |
| # This loops over the strings in the translated language file order |
| my @ids = ((0 .. ($idcount-1))); |
| push @ids, (0x8000 .. ($voiceid-1)); |
| |
| #for my $id (@ids) { |
| # print "$id\n"; |
| #} |
| |
| for $i (@ids) { |
| my $name=$idnum[$i]; # get the ID |
| my $dest = $voice{$name}; # get the destination voice string |
| |
| if($dest) { |
| $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes |
| |
| # Now, make sure we get the number from the english sort order: |
| $idnum = $idmap{$name}; |
| |
| if(length($idnum)) { |
| $engl[$idnum] = $i; |
| |
| #print "Input index $i output index $idnum\n"; |
| } |
| else { |
| # not used, mark it so |
| $engl[$i] = -1 |
| } |
| |
| } |
| } |
| for my $i (@ids) { |
| |
| my $o = $engl[$i]; |
| |
| if(($o < 0) || !length($o)) { |
| print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n"; |
| next; |
| } |
| |
| my $name=$idnum[$o]; # get the ID |
| my $dest = $voice{$name}; # get the destination voice string |
| |
| print "#$i ($o)\nid: $name\nvoice: $dest\n"; |
| } |
| |
| } |
| |
| |
| if($verbose) { |
| printf("%d ID strings scanned\n", $idcount); |
| |
| print "* head *\n"; |
| for(keys %head) { |
| printf "$_: %s\n", $head{$_}; |
| } |
| } |
| |