plural-rules-generator.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. # This Source Code Form is subject to the terms of the Mozilla Public
  2. # License, v. 2.0. If a copy of the MPL was not distributed with this
  3. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4. # Program used to generate /packages/api-utils/lib/l10n/plural-rules.js
  5. # Fetch unicode.org data in order to build functions specific to each language
  6. # that will return for a given integer, its plural form name.
  7. # Plural form names are: zero, one, two, few, many, other.
  8. #
  9. # More information here:
  10. # http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
  11. # http://cldr.unicode.org/index/cldr-spec/plural-rules
  12. # Usage:
  13. # $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js
  14. import urllib2
  15. import xml.dom.minidom
  16. import json
  17. import re
  18. PRINT_CONDITIONS_IN_COMMENTS = False
  19. UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml"
  20. CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)'
  21. # For a given regexp.MatchObject `g` for `CONDITION_RE`,
  22. # returns the equivalent JS piece of code
  23. # i.e. maps pseudo conditional language from unicode.org XML to JS code
  24. def parseCondition(g):
  25. lvalue = "n"
  26. if g.group(1):
  27. lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", ""))
  28. operator = g.group(2)
  29. if g.group(4):
  30. operator += " not"
  31. rvalue = g.group(5)
  32. if operator == "is":
  33. return "%s == %s" % (lvalue, rvalue)
  34. if operator == "is not":
  35. return "%s != %s" % (lvalue, rvalue)
  36. # "in", "within" or "not in" case:
  37. notPrefix = ""
  38. if operator == "not in":
  39. notPrefix = "!"
  40. # `rvalue` is a comma seperated list of either:
  41. # - numbers: 42
  42. # - ranges: 42..72
  43. sections = rvalue.split(',')
  44. if ".." not in rvalue:
  45. # If we don't have range, but only a list of integer,
  46. # we can simplify the generated code by using `isIn`
  47. # n in 1,3,6,42
  48. return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections))
  49. # n in 1..42
  50. # n in 1..3,42
  51. subCondition = []
  52. integers = []
  53. for sub in sections:
  54. if ".." in sub:
  55. left, right = sub.split("..")
  56. subCondition.append("isBetween(%s, %d, %d)" % (
  57. lvalue,
  58. int(left),
  59. int(right)
  60. ))
  61. else:
  62. integers.append(int(sub))
  63. if len(integers) > 1:
  64. subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers)))
  65. elif len(integers) == 1:
  66. subCondition.append("(%s == %s)" % (lvalue, integers[0]))
  67. return "%s(%s)" % (notPrefix, " || ".join(subCondition))
  68. def computeRules():
  69. # Fetch plural rules data directly from unicode.org website:
  70. url = UNICODE_ORG_XML_URL
  71. f = urllib2.urlopen(url)
  72. doc = xml.dom.minidom.parse(f)
  73. # Read XML document and extract locale to rules mapping
  74. localesMapping = {}
  75. algorithms = {}
  76. for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")):
  77. if not index in algorithms:
  78. algorithms[index] = {}
  79. for locale in pluralRules.getAttribute("locales").split():
  80. localesMapping[locale] = index
  81. for rule in pluralRules.childNodes:
  82. if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule":
  83. continue
  84. pluralForm = rule.getAttribute("count")
  85. algorithm = rule.firstChild.nodeValue
  86. algorithms[index][pluralForm] = algorithm
  87. # Go through all rules and compute a Javascript code for each of them
  88. rules = {}
  89. for index,rule in algorithms.iteritems():
  90. lines = []
  91. for pluralForm in rule:
  92. condition = rule[pluralForm]
  93. originalCondition = str(condition)
  94. # Convert pseudo language to JS code
  95. condition = rule[pluralForm].lower()
  96. condition = re.sub(CONDITION_RE, parseCondition, condition)
  97. condition = re.sub(r'or', "||", condition)
  98. condition = re.sub(r'and', "&&", condition)
  99. # Prints original condition in unicode.org pseudo language
  100. if PRINT_CONDITIONS_IN_COMMENTS:
  101. lines.append( '// %s' % originalCondition )
  102. lines.append( 'if (%s)' % condition )
  103. lines.append( ' return "%s";' % pluralForm )
  104. rules[index] = "\n ".join(lines)
  105. return localesMapping, rules
  106. localesMapping, rules = computeRules()
  107. rulesLines = []
  108. for index in rules:
  109. lines = rules[index]
  110. rulesLines.append('"%d": function (n) {' % index)
  111. rulesLines.append(' %s' % lines)
  112. rulesLines.append(' return "other"')
  113. rulesLines.append('},')
  114. print """/* This Source Code Form is subject to the terms of the Mozilla Public
  115. * License, v. 2.0. If a copy of the MPL was not distributed with this
  116. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  117. // This file is automatically generated with /python-lib/plural-rules-generator.py
  118. // Fetching data from: %s
  119. // Mapping of short locale name == to == > rule index in following list
  120. const LOCALES_TO_RULES = %s;
  121. // Utility functions for plural rules methods
  122. function isIn(n, list) list.indexOf(n) !== -1;
  123. function isBetween(n, start, end) start <= n && n <= end;
  124. // List of all plural rules methods, that maps an integer to the plural form name to use
  125. const RULES = {
  126. %s
  127. };
  128. /**
  129. * Return a function that gives the plural form name for a given integer
  130. * for the specified `locale`
  131. * let fun = getRulesForLocale('en');
  132. * fun(1) -> 'one'
  133. * fun(0) -> 'other'
  134. * fun(1000) -> 'other'
  135. */
  136. exports.getRulesForLocale = function getRulesForLocale(locale) {
  137. let index = LOCALES_TO_RULES[locale];
  138. if (!(index in RULES)) {
  139. console.warn('Plural form unknown for locale "' + locale + '"');
  140. return function () { return "other"; };
  141. }
  142. return RULES[index];
  143. }
  144. """ % (UNICODE_ORG_XML_URL,
  145. json.dumps(localesMapping, sort_keys=True, indent=2),
  146. "\n ".join(rulesLines))