Browse Source

use straight google search instead of the API. added argparse for better help messages. added ability to result different positions, get the full answer, and get the link

Benjamin Gleitzman 12 years ago
parent
commit
b387581bb3
3 changed files with 135 additions and 58 deletions
  1. 41 8
      README.md
  2. 0 50
      howdoi
  3. 94 0
      howdoi.py

+ 41 - 8
README.md

@@ -6,26 +6,59 @@ Are you a hack programmer? Do you find yourself constantly Googling for how to d
 Suppose you want to know how to format a date in bash. Why open your browser and read through blogs when you can just...
 
     $ howdoi format string bash
-    > date +"%m-%d-%y"
+    > [foo@bar ~]$date --date "2012-02-13" +%s
+    > 1329055200
+    > [foo@bar ~]$date --date @1329055200
+    > Mon Feb 13 00:00:00 EST 2012
+    > [foo@bar ~]$date --date @1329055200 +"%Y-%m-%d"
+    > 2012-02-13
+
+howdoi will answer all sorts of queries
 
     $ howdoi print stack trace python
+    > import traceback
+    >
+    > try:
+    >     1/0
+    > except:
+    >     print '>>> traceback <<<'
+    >     traceback.print_exc()
+    >     print '>>> end of traceback <<<'
     > traceback.print_exc()
 
-    $ howdoi create tar
+    $ howdoi convert mp4 to animated gif
+    > video=/path/to/video.avi
+    > outdir=/path/to/output.gif
+    > mplayer "$video" \
+    >         -ao null \
+    >         -ss "00:01:00" \  # starting point
+    >         -endpos 10 \ # duration in second
+    >         -vo gif89a:fps=13:output=$outdir \
+    >         -vf scale=240:180
+
+    $ howdoi create tar archive
     > tar -cf backup.tar --exclude "www/subf3" www
 
 Usage:
 
-`howdoi query`
+    usage: howdoi [-h] [-p POS] [-f] [-l] QUERY [QUERY ...]
+
+    code search tool
+
+    positional arguments:
+      QUERY              the question to answer
+
+    optional arguments:
+      -h, --help         show this help message and exit
+      -p POS, --pos POS  return answer in specified position (default: 1)
+      -f, --full         return the full text of the answer
+      -l, --link         display only the answer link
 
 Extra notes:
 
-*   Requires [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/)
+*   Requires [PyQuery](http://pypi.python.org/pypi/pyquery)
 *   Special thanks to Rich Jones ([@miserlou](https://github.com/miserlou)) for the idea
 
 TODOs:
 
-*   Pick the longest code block instead of the first
-*   Flag for printing multiple answers instead of the first
-*   Flag for displaying the entire answer
-*   Flag for retrieving the page URL
+*   Be able to pick StackOverflow result based on different criteria (active, oldest, etc.)

+ 0 - 50
howdoi

@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-##################################################
-#
-# howdoi - a code search tool.
-# written by Benjamin Gleitzman (gleitz@mit.edu)
-# inspired by Rich Jones (rich@anomos.info)
-#
-##################################################
-
-import urllib
-import urllib2
-import sys
-import json
-
-from BeautifulSoup import BeautifulSoup as bs
-
-SEARCH_URL = "https://www.googleapis.com/customsearch/v1?key=AIzaSyCo6SQ6XNvvS3fdJLcDNR4mpdIGGmVcXAk&cx=015163316206774170098:pj94ujarmcg&q={0}&alt=json"
-
-def get_result(url):
-    result = urllib2.urlopen(url)
-    return result.read()
-
-def get_instructions(query):
-    url = SEARCH_URL.format(urllib.quote(query))
-    result = get_result(url)
-    if not result:
-        return ''
-    else:
-        response = json.loads(result)
-        try:
-            link = response['items'][0]['link']
-            page = get_result(link)
-            soup = bs(page)
-            first_answer = soup.find("div", {"id": "answers"})
-            instructions = first_answer.find("code") or first_answer.find("pre")
-            return instructions.text
-        except:
-            return ''
-
-
-def howdoi(query):
-    instructions = get_instructions(query) or "Sorry, couldn't find any help with that topic"
-    print instructions
-
-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        print "USAGE: howdoi query (e.g. howdoi format date bash)"
-    else:
-        howdoi(" ".join(sys.argv[1:]))

+ 94 - 0
howdoi.py

@@ -0,0 +1,94 @@
+#!/usr/bin/python
+
+##################################################
+#
+# howdoi - a code search tool.
+# written by Benjamin Gleitzman (gleitz@mit.edu)
+# inspired by Rich Jones (rich@anomos.info)
+#
+##################################################
+
+import urllib
+import urllib2
+import sys
+import json
+import argparse
+import re
+
+from pyquery import PyQuery as pq
+
+GOOGLE_SEARCH_URL = "https://www.google.com/search?q=site:stackoverflow.com%20{0}"
+DUCK_SEARCH_URL = "http://duckduckgo.com/html?q=site%3Astackoverflow.com%20{0}"
+USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17"
+
+def get_result(url):
+    print url
+    opener = urllib2.build_opener()
+    opener.addheaders = [('User-agent', USER_AGENT)]
+    result = opener.open(url)
+    return result.read()
+
+def is_question(link):
+    return re.search('questions/\d+/', link)
+
+def get_google_links(query):
+    url = GOOGLE_SEARCH_URL.format(urllib.quote(query))
+    result = get_result(url)
+    html = pq(result)
+    return [a.attrib['href'] for a in html('.l')]
+
+def get_duck_links(query):
+    url = DUCK_SEARCH_URL.format(urllib.quote(query))
+    result = get_result(url)
+    html = pq(result)
+    links = [l.find('a').attrib['href'] for l in html('.links_main')]
+
+def get_link_at_pos(links, pos):
+    pos = int(args['pos']) - 1
+    for link in links:
+        if is_question(link):
+            if pos == 0:
+                break
+            else:
+                pos = pos - 1
+                continue
+    return link
+
+def get_instructions(args):
+    links = get_google_links(args['query'])
+    if not links:
+        return ''
+
+    link = get_link_at_pos(links, args['pos'])
+    if args.get('link'):
+        return '> ' + link
+
+    link = link + '?answertab=votes'
+    page = get_result(link)
+    html = pq(page)
+    first_answer = html('.answer').eq(0)
+    instructions = first_answer.find('pre') or first_answer.find('code')
+    if args['full'] or not instructions:
+        text = first_answer.find('.post-text').eq(0).text()
+    else:
+        text = instructions.eq(0).text()
+    text = '> ' + text
+    text = text.replace('\n', '\n> ')
+    return text
+
+def howdoi(args):
+    args['query'] = ' '.join(args['query']).replace('?', '')
+    instructions = get_instructions(args) or 'Sorry, couldn\'t find any help with that topic'
+    print instructions
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='code search tool')
+    parser.add_argument('query', metavar='QUERY', type=str, nargs='+',
+                        help='the question to answer')
+    parser.add_argument('-p','--pos', help='return answer in specified position (default: 1)', default=1)
+    parser.add_argument('-f','--full', help='return the full text of the answer',
+                        action='store_true')
+    parser.add_argument('-l','--link', help='display only the answer link',
+                        action='store_true')
+    args = vars(parser.parse_args())
+    howdoi(args)