|
@@ -9,6 +9,7 @@
|
|
|
######################################################
|
|
|
|
|
|
import argparse
|
|
|
+import random
|
|
|
import re
|
|
|
import requests
|
|
|
import sys
|
|
@@ -37,12 +38,15 @@ else:
|
|
|
return x
|
|
|
|
|
|
SEARCH_URL = 'https://www.google.com/search?q=site:stackoverflow.com%20{0}'
|
|
|
-USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17'
|
|
|
+USER_AGENTS = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
|
|
|
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
|
|
|
+ 'Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46 Safari/536.5',
|
|
|
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.229 Version/11.62']
|
|
|
ANSWER_HEADER = u('--- Answer {0} ---\n{1}')
|
|
|
NO_ANSWER_MSG = '< no answer given >'
|
|
|
|
|
|
def get_result(url):
|
|
|
- return requests.get(url, headers={'User-Agent': USER_AGENT}).text
|
|
|
+ return requests.get(url, headers={'User-Agent': random.choice(USER_AGENTS)}).text
|
|
|
|
|
|
|
|
|
def is_question(link):
|
|
@@ -53,7 +57,8 @@ def get_links(query):
|
|
|
url = SEARCH_URL.format(url_quote(query))
|
|
|
result = get_result(url)
|
|
|
html = pq(result)
|
|
|
- return [a.attrib['href'] for a in html('.l')]
|
|
|
+ return [a.attrib['href'] for a in html('.l')] or \
|
|
|
+ [a.attrib['href'] for a in html('.r')('a')]
|
|
|
|
|
|
|
|
|
def get_link_at_pos(links, pos):
|