render.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. import os
  4. from collections import Counter
  5. from functools import lru_cache
  6. import glob
  7. from pyquery import PyQuery
  8. import logging
  9. logger = logging.getLogger(__name__)
  10. LIB_PATH = os.path.dirname(os.path.abspath(__file__))
  11. STATIC_PATH = os.path.join(LIB_PATH, 'static')
  12. HTML_FILE = os.path.join(STATIC_PATH, 'TP_INDEX.html')
  13. TIME_HTML_FILE = os.path.join(STATIC_PATH, 'TP_TIME.html')
  14. FRIEND_AVATAR_CSS_FILE = os.path.join(STATIC_PATH, 'avatar.css.tpl')
  15. try:
  16. from csscompressor import compress as css_compress
  17. except ImportError:
  18. css_compress = lambda x: x
  19. from .msg import *
  20. from .common.textutil import get_file_b64
  21. from .common.progress import ProgressReporter
  22. from .common.timer import timing
  23. from .smiley import SmileyProvider
  24. from .msgslice import MessageSlicerByTime, MessageSlicerBySize
  25. TEMPLATES_FILES = {TYPE_MSG: "TP_MSG",
  26. TYPE_IMG: "TP_IMG",
  27. TYPE_SPEAK: "TP_SPEAK",
  28. TYPE_EMOJI: "TP_EMOJI",
  29. TYPE_CUSTOM_EMOJI: "TP_EMOJI",
  30. TYPE_LINK: "TP_MSG",
  31. TYPE_VIDEO_FILE: "TP_VIDEO_FILE",
  32. TYPE_QQMUSIC: "TP_QQMUSIC",
  33. }
  34. @lru_cache()
  35. def get_template(name: str | int) -> str | None:
  36. """Return the html template given a file name or msg type."""
  37. if isinstance(name, int):
  38. name = TEMPLATES_FILES.get(name, None)
  39. if name is None:
  40. return None
  41. html_path = os.path.join(STATIC_PATH, f"{name}.html")
  42. with open(html_path) as f:
  43. return f.read()
  44. class HTMLRender(object):
  45. def __init__(self, parser, res=None):
  46. with open(HTML_FILE) as f:
  47. self.html = f.read()
  48. with open(TIME_HTML_FILE) as f:
  49. self.time_html = f.read()
  50. self.parser = parser
  51. self.res = res
  52. assert self.res is not None, \
  53. "Resource Directory not given. Cannot render HTML."
  54. self.smiley = SmileyProvider()
  55. css_files = glob.glob(os.path.join(LIB_PATH, 'static/*.css'))
  56. self.css_string = [] # css to add
  57. for css in css_files:
  58. logger.info("Loading {}".format(os.path.basename(css)))
  59. with open(css) as f:
  60. self.css_string.append(f.read())
  61. js_files = glob.glob(os.path.join(LIB_PATH, 'static/*.js'))
  62. # to load jquery before other js
  63. js_files = sorted(js_files, key=lambda f: 'jquery-latest' in f, reverse=True)
  64. self.js_string = []
  65. for js in js_files:
  66. logger.info("Loading {}".format(os.path.basename(js)))
  67. with open(js) as f:
  68. self.js_string.append(f.read())
  69. self.unknown_type_cnt = Counter()
  70. @property
  71. def all_css(self):
  72. # call after processing all messages,
  73. # because smiley css need to be included only when necessary
  74. def process(css):
  75. css = css_compress(css)
  76. return u'<style type="text/css">{}</style>'.format(css)
  77. if hasattr(self, 'final_css'):
  78. return self.final_css + process(self.smiley.gen_used_smiley_css())
  79. self.final_css = u"\n".join(map(process, self.css_string))
  80. return self.final_css + process(self.smiley.gen_used_smiley_css())
  81. @property
  82. def all_js(self):
  83. if hasattr(self, 'final_js'):
  84. return self.final_js
  85. def process(js):
  86. # TODO: add js compress
  87. return u'<script type="text/javascript">{}</script>'.format(js)
  88. self.final_js = u"\n".join(map(process, self.js_string))
  89. return self.final_js
  90. #@timing(total=True)
  91. def render_msg(self, msg: WeChatMsg):
  92. """ render a message, return the html block"""
  93. # TODO for chatroom, add nickname on avatar
  94. sender = u'you ' + msg.talker if not msg.isSend else 'me'
  95. format_dict = {'sender_label': sender,
  96. 'time': msg.createTime }
  97. if not msg.known_type:
  98. self.unknown_type_cnt[msg.type] += 1
  99. if(not msg.isSend and msg.is_chatroom()):
  100. format_dict['nickname'] = '>\n <pre align=\'left\'>'+msg.talker_nickname+'</pre'
  101. else:
  102. format_dict['nickname'] = ' '
  103. def fallback():
  104. template = get_template(TYPE_MSG)
  105. content = msg.msg_str()
  106. content = self.smiley.replace_smileycode(content)
  107. if not msg.known_type:
  108. # Show raw (usually xml) content if unknown.
  109. content = html.escape(content)
  110. return template.format(content=content, **format_dict)
  111. template = get_template(msg.type)
  112. if msg.type == TYPE_SPEAK:
  113. audio_str, duration = self.res.get_voice_mp3(msg.imgPath)
  114. format_dict['voice_duration'] = duration
  115. format_dict['voice_str'] = audio_str
  116. return template.format(**format_dict)
  117. elif msg.type == TYPE_IMG:
  118. # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
  119. imgpath = msg.imgPath.split('_')[-1]
  120. if not imgpath:
  121. logger.warn('No imgpath in an image message. Perhaps a bug in wechat.')
  122. return fallback()
  123. bigimgpath = self.parser.imginfo.get(msg.msgSvrId)
  124. fnames = [k for k in [imgpath, bigimgpath] if k]
  125. img = self.res.get_img(fnames)
  126. if not img:
  127. logger.warn("No image thumbnail found for {}".format(imgpath))
  128. return fallback()
  129. # TODO do not show fancybox when no bigimg found
  130. format_dict['img'] = (img, 'jpeg')
  131. return template.format(**format_dict)
  132. elif msg.type == TYPE_QQMUSIC:
  133. jobj = json.loads(msg.msg_str())
  134. content = f"{jobj['title']} - {jobj['singer']}"
  135. if msg.imgPath is not None:
  136. # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
  137. imgpath = msg.imgPath.split('_')[-1]
  138. img = self.res.get_img([imgpath])
  139. format_dict['img'] = (img, 'jpeg')
  140. else:
  141. template = get_template("TP_QQMUSIC_NOIMG")
  142. return template.format(url=jobj['url'], content=content, **format_dict)
  143. elif msg.type == TYPE_EMOJI or msg.type == TYPE_CUSTOM_EMOJI:
  144. if 'emoticonmd5' in msg.content:
  145. pq = PyQuery(msg.content)
  146. md5 = pq('emoticonmd5').text()
  147. else:
  148. md5 = msg.imgPath
  149. # TODO md5 could exist in both.
  150. # first is emoji md5, second is image2/ md5
  151. # can use fallback here.
  152. if md5:
  153. emoji_img, format = self.res.get_emoji_by_md5(md5)
  154. format_dict['emoji_format'] = format
  155. format_dict['emoji_img'] = emoji_img
  156. else:
  157. import IPython as IP; IP.embed()
  158. return template.format(**format_dict)
  159. elif msg.type == TYPE_LINK:
  160. pq = PyQuery(msg.content_xml_ready)
  161. url = pq('url').text()
  162. if url:
  163. title = pq('title')[0].text
  164. content = '<a target="_blank" href="{0}">{1}</a>'.format(url, title)
  165. format_dict['content'] = content
  166. return template.format(**format_dict)
  167. elif msg.type == TYPE_VIDEO_FILE:
  168. video = self.res.get_video(msg.imgPath)
  169. if video is None:
  170. logger.warning(f"Cannot find video {msg.imgPath} ({msg.createTime})")
  171. # fallback
  172. format_dict['content'] = f"VIDEO FILE {msg.imgPath}"
  173. return get_template(TYPE_MSG).format(**format_dict)
  174. elif video.endswith(".mp4"):
  175. video_str = get_file_b64(video)
  176. format_dict["video_str"] = video_str
  177. return template.format(**format_dict)
  178. elif video.endswith(".jpg"):
  179. # only has thumbnail
  180. image_str = get_file_b64(video)
  181. format_dict["img"] = (image_str, 'jpeg')
  182. return get_template(TYPE_IMG).format(**format_dict)
  183. elif msg.type == TYPE_WX_VIDEO:
  184. # TODO: fetch video from resource
  185. return fallback()
  186. return fallback()
  187. def _render_partial_msgs(self, msgs):
  188. """ return single html"""
  189. self.smiley.reset()
  190. slicer = MessageSlicerByTime()
  191. slices = slicer.slice(msgs)
  192. blocks = []
  193. for idx, slice in enumerate(slices):
  194. nowtime = slice[0].createTime
  195. if idx == 0 or \
  196. slices[idx - 1][0].createTime.date() != nowtime.date():
  197. timestr = nowtime.strftime("%m/%d %H:%M:%S")
  198. else:
  199. timestr = nowtime.strftime("%H:%M:%S")
  200. blocks.append(self.time_html.format(time=timestr))
  201. blocks.extend([self.render_msg(m) for m in slice])
  202. self.prgs.trigger(len(slice))
  203. # string operation is extremely slow
  204. return self.html.format(extra_css=self.all_css,
  205. extra_js=self.all_js,
  206. chat=msgs[0].chat_nickname,
  207. messages=u''.join(blocks)
  208. )
  209. def prepare_avatar_css(self, talkers):
  210. with open(FRIEND_AVATAR_CSS_FILE) as f:
  211. avatar_tpl = f.read()
  212. my_avatar = self.res.get_avatar(self.parser.username)
  213. css = avatar_tpl.format(name='me', avatar=my_avatar)
  214. for talker in talkers:
  215. avatar = self.res.get_avatar(talker)
  216. css += avatar_tpl.format(name=talker, avatar=avatar)
  217. self.css_string.append(css)
  218. def render_msgs(self, msgs):
  219. """ render msgs of one chat, return a list of html"""
  220. if msgs[0].is_chatroom():
  221. talkers = set([m.talker for m in msgs])
  222. else:
  223. talkers = set([msgs[0].talker])
  224. self.prepare_avatar_css(talkers)
  225. self.res.cache_voice_mp3(msgs)
  226. chat = msgs[0].chat_nickname
  227. logger.info(u"Rendering {} messages of {}".format(
  228. len(msgs), chat))
  229. self.prgs = ProgressReporter("Render", total=len(msgs))
  230. slice_by_size = MessageSlicerBySize().slice(msgs)
  231. ret = [self._render_partial_msgs(s) for s in slice_by_size]
  232. self.prgs.finish()
  233. logger.warning("[HTMLRenderer] Unhandled messages (type->cnt): {}".format(self.unknown_type_cnt))
  234. return ret
  235. if __name__ == '__main__':
  236. r = HTMLRender()
  237. with open('/tmp/a.html', 'w') as f:
  238. print >> f, r.html.format(style=r.css, talker='talker',
  239. messages='haha')