parse_tencent_smiley.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. import xml.etree.ElementTree as ET
  4. import struct
  5. import os
  6. import json
  7. def parse_smiley_xml():
  8. ret = {}
  9. tree = ET.parse('smiley.xml')
  10. root = tree.getroot()
  11. for child in root:
  12. name = child.attrib['name']
  13. if 'smiley_values' not in name:
  14. continue
  15. if '_th' in name:
  16. continue # ignore thailand language
  17. lst = [c.text for c in child]
  18. assert len(lst) == 105
  19. for idx, v in enumerate(lst):
  20. if type(v) == str:
  21. # two code appears in the xml.. don't know why
  22. v = v.strip('"')
  23. v = v.replace('&lt;', '<')
  24. v = v.replace('&amp;', '&')
  25. ret[v] = os.path.join("smileys", f"{idx}.png")
  26. return ret
  27. def parse_extra_smiley():
  28. # some extra smiley from javascript on wx.qq.com
  29. with open("tencent-smiley-extra.json") as f:
  30. obj = json.load(f)
  31. extra = {'[' + k + ']': os.path.join("smileys", f"{v}.png") for k, v in obj.items()}
  32. return extra
  33. def parse_new_emoji():
  34. ret = {}
  35. xmlfile = "newemoji/newemoji-config.xml"
  36. tree = ET.parse(xmlfile)
  37. root = tree.getroot()
  38. for child in root:
  39. assert child.tag == "emoji"
  40. vals = {k.tag: k.text for k in child}
  41. filename = os.path.join("newemoji", vals["fileName"])
  42. for k, v in vals.items():
  43. if "-value" in k:
  44. ret[v] = filename
  45. return ret
  46. def parse_unicode_smiley():
  47. # 1f35c -> "\ue340"
  48. #self.unicode_smiley_code = gUnicodeCodeMap
  49. # u'\U0001f35c' -> "e340" # for iphone
  50. # u'\ue415' -> 'e415' # for android
  51. def unichar(i):
  52. try:
  53. return chr(i)
  54. except ValueError:
  55. return struct.pack('i', i).decode('utf-32')
  56. ret = {}
  57. with open("unicode-smiley.json") as f:
  58. d = json.load(f)
  59. for k, v in d.items():
  60. fname = os.path.join("smileys", hex(ord(v))[2:] + ".png")
  61. ret[unichar(int(k, 16))] = fname
  62. ret[v] = fname
  63. return ret
  64. if __name__ == "__main__":
  65. # parse old smileys
  66. smileys = {}
  67. def add(dic, name):
  68. smileys.update(dic)
  69. print(f"Found {len(dic)} smileys from {name}. Total is {len(smileys)}")
  70. add(parse_smiley_xml(), "smiley.xml")
  71. add(parse_extra_smiley(), "tencent-smiley-extra.json")
  72. add(parse_new_emoji(), "newemoji")
  73. add(parse_unicode_smiley(), "unicode-smiley.json")
  74. with open('tencent-smiley.json', 'w') as f:
  75. json.dump(smileys, f, indent=2)