Browse Source

msg to libchat seems to work

ppwwyyxx 10 years ago
parent
commit
24b1ad56e8
6 changed files with 173 additions and 16 deletions
  1. 0 0
      libchat/__init__.py
  2. 65 3
      libchat/libchat.py
  3. 92 0
      wechat/libchathelper.py
  4. 7 1
      wechat/msg.py
  5. 4 5
      wechat/render.py
  6. 5 7
      wechat/res.py

+ 0 - 0
libchat/__init__.py


+ 65 - 3
libchat/libchat.py

@@ -1,28 +1,90 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: libchat.py
-# Date: Wed Mar 25 16:43:40 2015 +0800
+# Date: Wed Mar 25 22:46:51 2015 +0800
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 import sqlite3
 import os
+from collections import namedtuple
+
+SOURCE_ID = {'wechat': 0}
+NUM_FIELDS = 8
+ChatMsgBase = namedtuple('ChatMsgBase',
+          ['source', 'time', 'sender', 'chatroom',
+           'text', 'image', 'sound', 'extra_data'])
+class ChatMsg(ChatMsgBase):
+    def __repr__(self):
+        return "Msg@{}/{}-{}/{}/{}/{}/{}".format(
+            self.time, self.sender, self.chatroom,
+            self.text.encode('utf-8'), 'IMG' if self.image else '',
+            'AUD' if self.sound else '', self.extra_data)
 
 class SqliteLibChat(object):
+    """ Interface for interacting with LibChat database"""
 
     def __init__(self, db_file):
         self.db_file = db_file
         self.conn = sqlite3.connect(db_file)
+        self.c = self.conn.cursor()
 
     def create(self):
-        c = self.conn.cursor()
-        c.execute("""
+        self.c.execute("""
           CREATE TABLE message (
           source SMALLINT,
           time INTEGER,
           sender TEXT,
           chatroom TEXT,
+          text TEXT,
           image COLLATE BINARY,
           sound COLLATE BINARY,
           extra_data COLLATE BINARY
          )
           """)
         self.conn.commit()
+
+    def _add_msg(self, tp):
+        assert isinstance(tp, ChatMsg)
+        self.c.execute(
+          """INSERT INTO message VALUES ({0})""".format(
+              ','.join(['?']*NUM_FIELDS)), tp)
+
+    def add_msgs(self, msgs):
+        self.c = self.conn.cursor()
+        for m in msgs:
+            self._add_msg(SqliteLibChat.prefilter(m))
+            self.conn.commit()
+
+    @staticmethod
+    def prefilter(msg):
+        source = msg.source
+        if isinstance(source, basestring):
+            source = SOURCE_ID[source]
+        return ChatMsg(source, *msg[1:])
+
+
+    def iterate_all_msg(self, predicate=None):
+        """ predicate: a dict used as SELECT filter
+            return a generator for all messages
+        """
+        if predicate is None:
+            self.c.execute("SELECT * FROM message")
+        else:
+            self.c.execute("SELECT * FROM message WHERE {}".format(
+                ' AND '.join(["{} = {}".format(k, v)
+                              for k, v in predicate.iteritems()])))
+        for row in self.c.fetchall():
+            yield ChatMsg(
+                *row[:5],
+                image=str(row[5]),
+                sound=str(row[6]),
+                extra_data=str(row[7])
+            )   # use str to get raw bytes
+
+
+if __name__ == '__main__':
+    msg = ChatMsg(-1, 1000, 'me', 'room', 'hello', '\x01\x02\x03', '', '')
+    db = SqliteLibChat('./message.db')
+    #db.add_msgs([msg])
+    for k in db.get_all_msg():
+        print k
+

+ 92 - 0
wechat/libchathelper.py

@@ -0,0 +1,92 @@
+#!/usr/bin/env python2
+# -*- coding: UTF-8 -*-
+# File: libchathelper.py
+# Date: Wed Mar 25 23:01:59 2015 +0800
+# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
+
+import base64
+from pyquery import PyQuery
+import logging
+import json
+logger = logging.getLogger(__name__)
+
+from libchat.libchat import SqliteLibChat, ChatMsg
+from .msg import *
+
+class LibChatHelper(object):
+    """ Build LibChat messages from WeChat Msg"""
+
+    FullyParsed = [TYPE_MSG, TYPE_SPEAK, TYPE_EMOJI,
+                    TYPE_CUSTOM_EMOJI, TYPE_IMG]
+
+    def __init__(self, parser, res):
+        """ res: a 'Resource' instance
+            parser: a 'WeChatDBParser' instance
+        """
+        self.parser = parser
+        self.res = res
+
+    def _get_image(self, msg):
+        """ get image content and type from a message"""
+        if msg.type == TYPE_IMG:
+            # imgPath was original THUMBNAIL_DIRPATH://th_xxxxxxxxx
+            imgpath = msg.imgPath.split('_')[-1]
+            if not imgpath:
+                logger.warn(
+                    'No imgpath in an image message. Perhaps a bug in wechat: {}'.format(msg))
+                return '', ''
+            bigimgpath = self.parser.imginfo.get(msg.msgSvrId)
+            img = self.res.get_img([imgpath, bigimgpath])
+            if not img:
+                logger.warn("No image found for {}".format(imgpath))
+            return img, 'jpeg'
+        elif msg.type == TYPE_EMOJI:
+            md5 = msg.imgPath
+            if md5 in self.parser.internal_emojis:
+                emoji_img, format = self.res.get_internal_emoji(
+                    self.parser.internal_emojis[md5])
+            else:
+                if md5 in self.parser.emojis:
+                    group, _ = self.parser.emojis[md5]
+                else:
+                    group = None
+                emoji_img, format = self.res.get_emoji(md5, group)
+            return emoji_img, format
+        elif msg.type == TYPE_CUSTOM_EMOJI:
+            pq = PyQuery(msg.content)
+            md5 = pq('emoticonmd5').text()
+            img, format = self.res.get_emoji(md5, None)
+            return img, format
+        else:
+            return '', ''
+
+    def _get_sound(self, msg):
+        if msg.type == TYPE_SPEAK:
+            audio_str, duration = self.res.get_voice_mp3(msg.imgPath)
+            return '{}:{}'.format(duration, audio_str)
+        return ''
+
+    def _get_extra(self, msg):
+        ret = {}
+        ret['type'] = msg.type
+        if msg.type not in LibChatHelper.FullyParsed:
+            ret['content'] = msg.content
+        return json.dumps(ret)
+
+    def _convert_msg(self, msg):
+        sender = 'me' if msg.isSend else msg.get_msg_talker_id()
+        chatroom = msg.get_chatroom()
+        text = msg.content_no_first_line if msg.type == TYPE_MSG else ''
+        img, format = self._get_image(msg)
+        if img:
+            # TODO don't use b64, directly return image content
+            img = base64.b64decode(img)
+# TODO do we need to save format?
+        sound = self._get_sound(msg)
+        extra = self._get_extra(msg)
+        return ChatMsg(
+            'wechat', msg.createTime, sender, chatroom,
+            text, img, sound, extra)
+
+    def convert_msgs(self, msgs):
+        return [self._convert_msg(m) for m in msgs]

+ 7 - 1
wechat/msg.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: msg.py
-# Date: Fri Jan 09 22:14:53 2015 +0800
+# Date: Wed Mar 25 22:27:58 2015 +0800
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 TYPE_MSG = 1
 TYPE_IMG = 3
@@ -139,6 +139,12 @@ class WeChatMsg(object):
             return self.talker
         return self.content[:self.content.find(':')]
 
+    def get_chatroom(self):
+        if self.is_chatroom():
+            return self.talker[:-9]
+        else:
+            return ''
+
     def get_emoji_product_id(self):
         assert self.type == TYPE_EMOJI, "Wrong call to get_emoji_product_id()!"
         pq = PyQuery(self.content)

+ 4 - 5
wechat/render.py

@@ -1,14 +1,14 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: render.py
-# Date: Sun Feb 01 17:46:57 2015 +0800
+# Date: Wed Mar 25 22:24:53 2015 +0800
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 
 import os
 import base64
 import glob
-import logging
 from pyquery import PyQuery
+import logging
 logger = logging.getLogger(__name__)
 
 LIB_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -113,11 +113,10 @@ class HTMLRender(object):
                 return fallback()
             bigimgpath = self.parser.imginfo.get(msg.msgSvrId)
             fnames = [k for k in [imgpath, bigimgpath] if k]
-            bigimg, smallimg = self.res.get_img(fnames)
-            if not smallimg:
+            img = self.res.get_img(fnames)
+            if not img:
                 logger.warn("No image thumbnail found for {}".format(imgpath))
                 return fallback()
-            img = bigimg if bigimg else smallimg
             # TODO do not show fancybox when no bigimg found
             format_dict['img'] = (img, 'jpeg')
             return template.format(**format_dict)

+ 5 - 7
wechat/res.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 # File: res.py
-# Date: Sun Jan 11 23:38:31 2015 +0800
+# Date: Wed Mar 25 22:39:59 2015 +0800
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 
 import glob
@@ -57,6 +57,7 @@ class Resource(object):
         [check(k) for k in ['', AVATAR_DIRNAME, IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]
 
         self.res_dir = res_dir
+        self.voice_cache_idx = {}
         self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
         self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
         self.emoji_dir = os.path.join(res_dir, EMOJI_DIRNAME)
@@ -154,12 +155,9 @@ class Resource(object):
                 return base64.b64encode(buf.getvalue())
             return get_file_b64(img_file)
         big_file = get_jpg_b64(big_file)
-        small_file = get_jpg_b64(small_file)
-
-        if big_file and not small_file:
-            # TODO resize big to a thumbnail
-            pass
-        return (big_file, small_file)
+        if big_file:
+            return big_file
+        return get_jpg_b64(small_file)
 
     def get_emoji(self, md5, pack_id):
         path = self.emoji_dir