1
0

howdoi.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/python
  2. ##################################################
  3. #
  4. # howdoi - a code search tool.
  5. # written by Benjamin Gleitzman (gleitz@mit.edu)
  6. # inspired by Rich Jones (rich@anomos.info)
  7. #
  8. ##################################################
  9. import urllib
  10. import urllib2
  11. import sys
  12. import json
  13. import argparse
  14. import re
  15. from pyquery import PyQuery as pq
  16. GOOGLE_SEARCH_URL = "https://www.google.com/search?q=site:stackoverflow.com%20{0}"
  17. DUCK_SEARCH_URL = "http://duckduckgo.com/html?q=site%3Astackoverflow.com%20{0}"
  18. USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17"
  19. def get_result(url):
  20. opener = urllib2.build_opener()
  21. opener.addheaders = [('User-agent', USER_AGENT)]
  22. result = opener.open(url)
  23. return result.read()
  24. def is_question(link):
  25. return re.search('questions/\d+/', link)
  26. def get_google_links(query):
  27. url = GOOGLE_SEARCH_URL.format(urllib.quote(query))
  28. result = get_result(url)
  29. html = pq(result)
  30. return [a.attrib['href'] for a in html('.l')]
  31. def get_duck_links(query):
  32. url = DUCK_SEARCH_URL.format(urllib.quote(query))
  33. result = get_result(url)
  34. html = pq(result)
  35. links = [l.find('a').attrib['href'] for l in html('.links_main')]
  36. def get_link_at_pos(links, pos):
  37. pos = int(args['pos']) - 1
  38. for link in links:
  39. if is_question(link):
  40. if pos == 0:
  41. break
  42. else:
  43. pos = pos - 1
  44. continue
  45. return link
  46. def get_instructions(args):
  47. links = get_google_links(args['query'])
  48. if not links:
  49. return ''
  50. link = get_link_at_pos(links, args['pos'])
  51. if args.get('link'):
  52. return '> ' + link
  53. link = link + '?answertab=votes'
  54. page = get_result(link)
  55. html = pq(page)
  56. first_answer = html('.answer').eq(0)
  57. instructions = first_answer.find('pre') or first_answer.find('code')
  58. if args['full'] or not instructions:
  59. text = first_answer.find('.post-text').eq(0).text()
  60. else:
  61. text = instructions.eq(0).text()
  62. text = '> ' + text
  63. text = text.replace('\n', '\n> ')
  64. return text
  65. def howdoi(args):
  66. args['query'] = ' '.join(args['query']).replace('?', '')
  67. instructions = get_instructions(args) or 'Sorry, couldn\'t find any help with that topic'
  68. print instructions
  69. if __name__ == '__main__':
  70. parser = argparse.ArgumentParser(description='code search tool')
  71. parser.add_argument('query', metavar='QUERY', type=str, nargs='+',
  72. help='the question to answer')
  73. parser.add_argument('-p','--pos', help='return answer in specified position (default: 1)', default=1)
  74. parser.add_argument('-f','--full', help='return the full text of the answer',
  75. action='store_true')
  76. parser.add_argument('-l','--link', help='display only the answer link',
  77. action='store_true')
  78. args = vars(parser.parse_args())
  79. howdoi(args)