property_parser.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # This Source Code Form is subject to the terms of the Mozilla Public
  2. # License, v. 2.0. If a copy of the MPL was not distributed with this
  3. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4. import re
  5. import codecs
  6. class MalformedLocaleFileError(Exception):
  7. pass
  8. def parse_file(path):
  9. return parse(read_file(path), path)
  10. def read_file(path):
  11. try:
  12. return codecs.open( path, "r", "utf-8" ).readlines()
  13. except UnicodeDecodeError, e:
  14. raise MalformedLocaleFileError(
  15. 'Following locale file is not a valid ' +
  16. 'UTF-8 file: %s\n%s"' % (path, str(e)))
  17. COMMENT = re.compile(r'\s*#')
  18. EMPTY = re.compile(r'^\s+$')
  19. KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)")
  20. def parse(lines, path=None):
  21. lines = iter(lines)
  22. lineNo = 1
  23. pairs = dict()
  24. for line in lines:
  25. if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0:
  26. continue
  27. m = KEYVALUE.match(line)
  28. if not m:
  29. raise MalformedLocaleFileError(
  30. 'Following locale file is not a valid .properties file: %s\n'
  31. 'Line %d is incorrect:\n%s' % (path, lineNo, line))
  32. # All spaces are strip. Spaces at the beginning are stripped
  33. # by the regular expression. We have to strip spaces at the end.
  34. key = m.group(1).rstrip()
  35. val = m.group(3).rstrip()
  36. val = val.encode('raw-unicode-escape').decode('raw-unicode-escape')
  37. # `key` can be empty when key is only made of spaces
  38. if not key:
  39. raise MalformedLocaleFileError(
  40. 'Following locale file is not a valid .properties file: %s\n'
  41. 'Key is invalid on line %d is incorrect:\n%s' %
  42. (path, lineNo, line))
  43. # Multiline value: keep reading lines, while lines end with backslash
  44. # and strip spaces at the beginning of lines except the last line
  45. # that doesn't end up with backslash, we strip all spaces for this one.
  46. if val.endswith("\\"):
  47. val = val[:-1]
  48. try:
  49. # remove spaces before/after and especially the \n at EOL
  50. line = lines.next().strip()
  51. while line.endswith("\\"):
  52. val += line[:-1].lstrip()
  53. line = lines.next()
  54. lineNo += 1
  55. val += line.strip()
  56. except StopIteration:
  57. raise MalformedLocaleFileError(
  58. 'Following locale file is not a valid .properties file: %s\n'
  59. 'Unexpected EOF in multiline sequence at line %d:\n%s' %
  60. (path, lineNo, line))
  61. # Save this new pair
  62. pairs[key] = val
  63. lineNo += 1
  64. normalize_plural(path, pairs)
  65. return pairs
  66. # Plural forms in properties files are defined like this:
  67. # key = other form
  68. # key[one] = one form
  69. # key[...] = ...
  70. # Parse them and merge each key into one object containing all forms:
  71. # key: {
  72. # other: "other form",
  73. # one: "one form",
  74. # ...: ...
  75. # }
  76. PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$')
  77. def normalize_plural(path, pairs):
  78. for key in list(pairs.keys()):
  79. m = PLURAL_FORM.match(key)
  80. if not m:
  81. continue
  82. main_key = m.group(1)
  83. plural_form = m.group(2)
  84. # Allows not specifying a generic key (i.e a key without [form])
  85. if not main_key in pairs:
  86. pairs[main_key] = {}
  87. # Ensure that we always have the [other] form
  88. if not main_key + "[other]" in pairs:
  89. raise MalformedLocaleFileError(
  90. 'Following locale file is not a valid UTF-8 file: %s\n'
  91. 'This plural form doesn\'t have a matching `%s[other]` form:\n'
  92. '%s\n'
  93. 'You have to defined following key:\n%s'
  94. % (path, main_key, key, main_key))
  95. # convert generic form into an object if it is still a string
  96. if isinstance(pairs[main_key], unicode):
  97. pairs[main_key] = {"other": pairs[main_key]}
  98. # then, add this new plural form
  99. pairs[main_key][plural_form] = pairs[key]
  100. del pairs[key]