Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.6 |
| 2 | import re |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 3 | import sys |
| 4 | import locale |
Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 5 | import codecs |
| 6 | from glob import glob |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 7 | from os.path import basename, dirname, join |
Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 8 | from pprint import pprint |
| 9 | |
| 10 | def langs(): |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 11 | return glob(join(dirname(__file__), 'rockbox/apps/lang/*.lang')) |
Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 12 | |
| 13 | def fonts(): |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 14 | return glob(join(dirname(__file__), 'rockbox/fonts/*.bdf')) |
Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 15 | |
| 16 | def charusage(langfile): |
| 17 | usage = {} |
| 18 | fp = codecs.open(langfile, 'r', 'UTF-8') |
| 19 | indest = False |
| 20 | for line in fp: |
| 21 | if re.match(r'^\s*<dest>\s*$', line): |
| 22 | indest = True |
| 23 | elif re.match(r'^\s*</dest>\s*$', line): |
| 24 | indest = False |
| 25 | |
| 26 | if indest: |
| 27 | string = re.match(r'\s*\S*\s*:\s*"([^"]*)"\s*', line) |
| 28 | if string: |
| 29 | for char in string.group(1): |
| 30 | if char not in usage: |
| 31 | usage[char] = 0 |
| 32 | usage[char] += 1 |
| 33 | return usage |
| 34 | |
| 35 | def charsavailable(fontfile): |
| 36 | chars = [] |
| 37 | fp = open(fontfile, 'r') |
| 38 | for line in fp: |
| 39 | encoding = re.match(r'ENCODING\s+(\d+)\s*', line) |
| 40 | if encoding: |
| 41 | chars.append(unichr(int(encoding.group(1)))) |
| 42 | return chars |
| 43 | |
| 44 | def calculatecoverage(charsused, charsavailable): |
| 45 | total = 0 |
| 46 | covered = 0 |
| 47 | for char, uses in charsused.iteritems(): |
| 48 | if char == u' ': |
| 49 | continue |
| 50 | total += uses |
| 51 | if char in charsavailable: |
| 52 | covered += uses |
| 53 | return float(covered)/float(total) |
| 54 | |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 55 | def generate_summary(fontstats, langusage): |
| 56 | for langfile, charsused in sorted(langusage.items()): |
| 57 | print "[%s]" % basename(langfile).replace('.lang', '') |
| 58 | for fontfile, charsavailable in sorted(fontstats.items()): |
| 59 | coverage = calculatecoverage(charsused, charsavailable) |
| 60 | print " %s = %f" % (basename(fontfile).replace('.bdf', ''), coverage) |
Jonas Häggqvist | 45f3dfb | 2011-04-18 21:32:14 +0000 | [diff] [blame] | 61 | |
Frank Gevaerts | f6a9d68 | 2012-04-04 21:26:38 +0200 | [diff] [blame] | 62 | def generate_missing(fontstats, langusage): |
| 63 | for langfile, charsused in sorted(langusage.items()): |
| 64 | print "[%s]" % basename(langfile).replace('.lang', '') |
| 65 | for fontfile, charsavailable in sorted(fontstats.items()): |
| 66 | missingchars = [] |
| 67 | for char, uses in charsused.iteritems(): |
| 68 | if char not in charsavailable: |
| 69 | missingchars.append(char) |
| 70 | # If more than 50 characters are missing, don't print them all |
| 71 | if 25 > len(missingchars) > 0: |
| 72 | print " %s = %s" % (basename(fontfile).replace('.bdf', ''), " ".join(["%s (u+%X)" % (c, ord(c)) for c in missingchars])) |
| 73 | |
| 74 | |
| 75 | if __name__ == '__main__': |
| 76 | sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout); |
| 77 | |
| 78 | fontstats = dict([(font, charsavailable(font)) for font in fonts()]) |
| 79 | langusage = dict([(lang, charusage(lang)) for lang in langs()]) |
| 80 | |
| 81 | if len(sys.argv) > 1 and sys.argv[1] == 'missing': |
| 82 | generate_missing(fontstats, langusage) |
| 83 | else: |
| 84 | generate_summary(fontstats, langusage) |