howdoi.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. #!/usr/bin/env python
  2. ######################################################
  3. #
  4. # howdoi - instant coding answers via the command line
  5. # written by Benjamin Gleitzman (gleitz@mit.edu)
  6. # inspired by Rich Jones (rich@anomos.info)
  7. #
  8. ######################################################
  9. import gc
  10. gc.disable()
  11. import argparse
  12. import inspect
  13. import json
  14. import os
  15. import re
  16. import sys
  17. import textwrap
  18. from urllib.request import getproxies
  19. from urllib.parse import quote as url_quote, urlparse, parse_qs
  20. from multiprocessing import Pool
  21. import logging
  22. import appdirs
  23. import requests
  24. from cachelib import FileSystemCache, NullCache
  25. from keep import utils as keep_utils
  26. from pygments.lexers import guess_lexer, get_lexer_by_name
  27. from pygments.util import ClassNotFound
  28. from rich.syntax import Syntax
  29. from rich.console import Console
  30. from pyquery import PyQuery as pq
  31. from requests.exceptions import ConnectionError as RequestsConnectionError
  32. from requests.exceptions import SSLError
  33. from colorama import init
  34. init()
  35. from howdoi import __version__
  36. from howdoi.errors import GoogleValidationError, BingValidationError, DDGValidationError
  37. logging.basicConfig(format='%(levelname)s: %(message)s')
  38. if os.getenv('HOWDOI_DISABLE_SSL'): # Set http instead of https
  39. SCHEME = 'http://'
  40. VERIFY_SSL_CERTIFICATE = False
  41. else:
  42. SCHEME = 'https://'
  43. VERIFY_SSL_CERTIFICATE = True
  44. SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')
  45. URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'
  46. USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
  47. 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
  48. 'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
  49. ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
  50. 'Chrome/19.0.1084.46 Safari/536.5'),
  51. ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
  52. 'Safari/536.5'),)
  53. SEARCH_URLS = {
  54. 'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
  55. 'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
  56. 'duckduckgo': SCHEME + 'duckduckgo.com/html?q=site:{0}%20{1}&t=hj&ia=web'
  57. }
  58. BLOCK_INDICATORS = (
  59. 'form id="captcha-form"',
  60. 'This page appears when Google automatically detects requests coming from your computer '
  61. 'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
  62. )
  63. BLOCKED_QUESTION_FRAGMENTS = (
  64. 'webcache.googleusercontent.com',
  65. )
  66. STAR_HEADER = '\u2605'
  67. ANSWER_HEADER = '{2} Answer from {0} {2}\n{1}'
  68. NO_ANSWER_MSG = '< no answer given >'
  69. CACHE_EMPTY_VAL = "NULL"
  70. CACHE_DIR = appdirs.user_cache_dir('howdoi')
  71. CACHE_ENTRY_MAX = 128
  72. HTML_CACHE_PATH = 'page_cache'
  73. SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi', 'setup howdoi',
  74. 'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']
  75. NO_RESULTS_MESSAGE = "Sorry, couldn't find any help with that topic"
  76. # variables for text formatting, prepend to string to begin text formatting.
  77. BOLD = '\033[1m'
  78. GREEN = '\033[92m'
  79. RED = '\033[91m'
  80. UNDERLINE = '\033[4m'
  81. END_FORMAT = '\033[0m' # append to string to end text formatting.
  82. # stash options
  83. STASH_SAVE = 'save'
  84. STASH_VIEW = 'view'
  85. STASH_REMOVE = 'remove'
  86. STASH_EMPTY = 'empty'
  87. BLOCKED_ENGINES = []
  88. if os.getenv('HOWDOI_DISABLE_CACHE'):
  89. # works like an always empty cache
  90. cache = NullCache()
  91. else:
  92. cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)
  93. howdoi_session = requests.session()
  94. class BlockError(RuntimeError):
  95. pass
  96. class IntRange:
  97. def __init__(self, imin=None, imax=None):
  98. self.imin = imin
  99. self.imax = imax
  100. def __call__(self, arg):
  101. try:
  102. value = int(arg)
  103. except ValueError as value_error:
  104. raise self.exception() from value_error
  105. if (self.imin is not None and value < self.imin) or (self.imax is not None and value > self.imax):
  106. raise self.exception()
  107. return value
  108. def exception(self):
  109. if self.imin is not None and self.imax is not None:
  110. return argparse.ArgumentTypeError(f'Must be an integer in the range [{self.imin}, {self.imax}]')
  111. if self.imin is not None:
  112. return argparse.ArgumentTypeError(f'Must be an integer >= {self.imin}')
  113. if self.imax is not None:
  114. return argparse.ArgumentTypeError(f'Must be an integer <= {self.imax}')
  115. return argparse.ArgumentTypeError('Must be an integer')
  116. def _random_int(width):
  117. bres = os.urandom(width)
  118. if sys.version < '3':
  119. ires = int(bres.encode('hex'), 16)
  120. else:
  121. ires = int.from_bytes(bres, 'little')
  122. return ires
  123. def _random_choice(seq):
  124. return seq[_random_int(1) % len(seq)]
  125. def get_proxies():
  126. proxies = getproxies()
  127. filtered_proxies = {}
  128. for key, value in proxies.items():
  129. if key.startswith('http'):
  130. if not value.startswith('http'):
  131. filtered_proxies[key] = f'http://{value}'
  132. else:
  133. filtered_proxies[key] = value
  134. return filtered_proxies
  135. def _format_url_to_filename(url, file_ext='html'):
  136. filename = ''.join(ch for ch in url if ch.isalnum())
  137. return filename + '.' + file_ext
  138. def _get_result(url):
  139. try:
  140. resp = howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
  141. proxies=get_proxies(),
  142. verify=VERIFY_SSL_CERTIFICATE,
  143. cookies={'CONSENT': 'YES+US.en+20170717-00-0'})
  144. resp.raise_for_status()
  145. return resp.text
  146. except requests.exceptions.SSLError as error:
  147. logging.error('%sEncountered an SSL Error. Try using HTTP instead of '
  148. 'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n%s', RED, END_FORMAT)
  149. raise error
  150. def _get_from_cache(cache_key):
  151. # As of cachelib 0.3.0, it internally logging a warning on cache miss
  152. current_log_level = logging.getLogger().getEffectiveLevel()
  153. # Reduce the log level so the warning is not printed
  154. logging.getLogger().setLevel(logging.ERROR)
  155. page = cache.get(cache_key) # pylint: disable=assignment-from-none
  156. # Restore the log level
  157. logging.getLogger().setLevel(current_log_level)
  158. return page
  159. def _add_links_to_text(element):
  160. hyperlinks = element.find('a')
  161. for hyperlink in hyperlinks:
  162. pquery_object = pq(hyperlink)
  163. href = hyperlink.attrib['href']
  164. copy = pquery_object.text()
  165. if copy == href:
  166. replacement = copy
  167. else:
  168. replacement = f'[{copy}]({href})'
  169. pquery_object.replace_with(replacement)
  170. def get_text(element):
  171. ''' return inner text in pyquery element '''
  172. _add_links_to_text(element)
  173. try:
  174. return element.text(squash_space=False)
  175. except TypeError:
  176. return element.text()
  177. def _extract_links_from_bing(html):
  178. html.remove_namespaces()
  179. return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]
  180. def _clean_google_link(link):
  181. if '/url?' in link:
  182. parsed_link = urlparse(link)
  183. query_params = parse_qs(parsed_link.query)
  184. url_params = query_params.get('q', []) or query_params.get('url', [])
  185. if url_params:
  186. return url_params[0]
  187. return link
  188. def _extract_links_from_google(query_object):
  189. html = query_object.html()
  190. link_pattern = re.compile(fr"https?://{URL}/questions/[0-9]*/[a-z0-9-]*")
  191. links = link_pattern.findall(html)
  192. links = [_clean_google_link(link) for link in links]
  193. return links
  194. def _extract_links_from_duckduckgo(html):
  195. html.remove_namespaces()
  196. links_anchors = html.find('a.result__a')
  197. results = []
  198. for anchor in links_anchors:
  199. link = anchor.attrib['href']
  200. url_obj = urlparse(link)
  201. parsed_url = parse_qs(url_obj.query).get('uddg', '')
  202. if parsed_url:
  203. results.append(parsed_url[0])
  204. return results
  205. def _extract_links(html, search_engine):
  206. if search_engine == 'bing':
  207. return _extract_links_from_bing(html)
  208. if search_engine == 'duckduckgo':
  209. return _extract_links_from_duckduckgo(html)
  210. return _extract_links_from_google(html)
  211. def _get_search_url(search_engine):
  212. return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])
  213. def _is_blocked(page):
  214. for indicator in BLOCK_INDICATORS:
  215. if page.find(indicator) != -1:
  216. return True
  217. return False
  218. def _get_links(query):
  219. search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
  220. search_url = _get_search_url(search_engine).format(URL, url_quote(query))
  221. logging.info('Searching %s with URL: %s', search_engine, search_url)
  222. try:
  223. result = _get_result(search_url)
  224. except requests.HTTPError:
  225. logging.info('Received HTTPError')
  226. result = None
  227. if not result or _is_blocked(result):
  228. logging.error('%sUnable to find an answer because the search engine temporarily blocked the request. '
  229. 'Attempting to use a different search engine.%s', RED, END_FORMAT)
  230. raise BlockError('Temporary block by search engine')
  231. html = pq(result)
  232. links = _extract_links(html, search_engine)
  233. if len(links) == 0:
  234. logging.info('Search engine %s found no StackOverflow links, returned HTML is:', search_engine)
  235. logging.info(result)
  236. return list(dict.fromkeys(links)) # remove any duplicates
  237. def get_link_at_pos(links, position):
  238. if not links:
  239. return False
  240. if len(links) >= position:
  241. link = links[position - 1]
  242. else:
  243. link = links[-1]
  244. return link
  245. def _format_output(args, code):
  246. if not args['color']:
  247. return code
  248. lexer = None
  249. # try to find a lexer using the StackOverflow tags
  250. # or the query arguments
  251. for keyword in args['query'].split() + args['tags']:
  252. try:
  253. lexer = get_lexer_by_name(keyword).name
  254. break
  255. except ClassNotFound:
  256. pass
  257. # no lexer found above, use the guesser
  258. if not lexer:
  259. try:
  260. lexer = guess_lexer(code).name
  261. except ClassNotFound:
  262. return code
  263. syntax = Syntax(code, lexer, background_color="default", line_numbers=False)
  264. console = Console(record=True)
  265. with console.capture() as capture:
  266. console.print(syntax)
  267. return capture.get()
  268. def _is_question(link):
  269. for fragment in BLOCKED_QUESTION_FRAGMENTS:
  270. if fragment in link:
  271. return False
  272. return re.search(r'questions/\d+/', link)
  273. def _get_questions(links):
  274. return [link for link in links if _is_question(link)]
  275. def _get_answer(args, link): # pylint: disable=too-many-branches
  276. cache_key = _get_cache_key(link)
  277. page = _get_from_cache(cache_key)
  278. if not page:
  279. logging.info('Fetching page: %s', link)
  280. page = _get_result(link + '?answertab=votes')
  281. cache.set(cache_key, page)
  282. else:
  283. logging.info('Using cached page: %s', link)
  284. html = pq(page)
  285. first_answer = html('.answercell').eq(0) or html('.answer').eq(0)
  286. instructions = first_answer.find('pre') or first_answer.find('code')
  287. args['tags'] = [t.text for t in html('.post-tag')]
  288. # make decision on answer body class.
  289. if first_answer.find(".js-post-body"):
  290. answer_body_cls = ".js-post-body"
  291. else:
  292. # rollback to post-text class
  293. answer_body_cls = ".post-text"
  294. if not instructions and not args['all']:
  295. logging.info('No code sample found, returning entire answer')
  296. text = get_text(first_answer.find(answer_body_cls).eq(0))
  297. elif args['all']:
  298. logging.info('Returning entire answer')
  299. texts = []
  300. for html_tag in first_answer.items(f'{answer_body_cls} > *'):
  301. current_text = get_text(html_tag)
  302. if current_text:
  303. if html_tag[0].tag in ['pre', 'code']:
  304. texts.append(_format_output(args, current_text))
  305. else:
  306. texts.append(current_text)
  307. text = '\n'.join(texts)
  308. else:
  309. text = _format_output(args, get_text(instructions.eq(0)))
  310. if text is None:
  311. logging.info('%sAnswer was empty%s', RED, END_FORMAT)
  312. text = NO_ANSWER_MSG
  313. text = text.strip()
  314. return text
  315. def _get_links_with_cache(query):
  316. cache_key = _get_cache_key(query)
  317. res = _get_from_cache(cache_key)
  318. if res:
  319. logging.info('Using cached links')
  320. if res == CACHE_EMPTY_VAL:
  321. logging.info('No StackOverflow links found in cached search engine results - will make live query')
  322. else:
  323. return res
  324. links = _get_links(query)
  325. if not links:
  326. cache.set(cache_key, CACHE_EMPTY_VAL)
  327. question_links = _get_questions(links)
  328. cache.set(cache_key, question_links or CACHE_EMPTY_VAL)
  329. return question_links
  330. def build_splitter(splitter_character='=', splitter_length=80):
  331. return '\n' + splitter_character * splitter_length + '\n\n'
  332. def _get_answers(args):
  333. """
  334. @args: command-line arguments
  335. returns: array of answers and their respective metadata
  336. False if unable to get answers
  337. """
  338. question_links = _get_links_with_cache(args['query'])
  339. if not question_links:
  340. return False
  341. initial_pos = args['pos'] - 1
  342. final_pos = initial_pos + args['num_answers']
  343. question_links = question_links[initial_pos:final_pos]
  344. search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
  345. logging.info('Links from %s found on %s: %s', URL, search_engine, len(question_links))
  346. logging.info('URL: %s', '\n '.join(question_links))
  347. logging.info('Answers requested: %s, Starting at position: %s', args["num_answers"], args['pos'])
  348. with Pool() as pool:
  349. answers = pool.starmap(
  350. _get_answer_worker,
  351. [(args, link) for link in question_links]
  352. )
  353. answers = [a for a in answers if a.get('answer')]
  354. for i, answer in enumerate(answers, 1):
  355. answer['position'] = i
  356. logging.info('Total answers returned: %s', len(answers))
  357. return answers or False
  358. def _get_answer_worker(args, link):
  359. answer = _get_answer(args, link)
  360. result = {
  361. 'answer': None,
  362. 'link': None,
  363. 'position': None
  364. }
  365. multiple_answers = (args['num_answers'] > 1 or args['all'])
  366. if not answer:
  367. return result
  368. if not args['link'] and not args['json_output'] and multiple_answers:
  369. answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
  370. answer += '\n'
  371. result['answer'] = answer
  372. result['link'] = link
  373. return result
  374. def _clear_cache():
  375. global cache # pylint: disable=global-statement,invalid-name
  376. if not cache:
  377. cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, 0)
  378. return cache.clear()
  379. def _is_help_query(query):
  380. return any(query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES)
  381. def _format_answers(args, res):
  382. if "error" in res:
  383. return f'ERROR: {RED}{res["error"]}{END_FORMAT}'
  384. if args["json_output"]:
  385. return json.dumps(res)
  386. formatted_answers = []
  387. for answer in res:
  388. next_ans = answer["answer"]
  389. if args["link"]: # if we only want links
  390. next_ans = answer["link"]
  391. formatted_answers.append(next_ans or NO_RESULTS_MESSAGE)
  392. return build_splitter().join(formatted_answers)
  393. def _get_help_instructions():
  394. instruction_splitter = build_splitter(' ', 60)
  395. query = 'print hello world in python'
  396. instructions = [
  397. 'Here are a few popular howdoi commands ',
  398. '>>> howdoi {} (default query)',
  399. '>>> howdoi {} -a (read entire answer)',
  400. '>>> howdoi {} -n [number] (retrieve n number of answers)',
  401. '>>> howdoi {} -l (display only a link to where the answer is from',
  402. '>>> howdoi {} -c (Add colors to the output)',
  403. '>>> howdoi {} -e (Specify the search engine you want to use e.g google,bing)'
  404. ]
  405. instructions = map(lambda s: s.format(query), instructions)
  406. return instruction_splitter.join(instructions)
  407. def _get_cache_key(args):
  408. frame = inspect.currentframe()
  409. calling_func = inspect.getouterframes(frame)[1].function
  410. return calling_func + str(args) + __version__
  411. def format_stash_item(fields, index=-1):
  412. title = fields['alias']
  413. description = fields['desc']
  414. item_num = index + 1
  415. if index == -1:
  416. return f'{UNDERLINE}{BOLD}$ {title}{END_FORMAT}\n\n{description}\n'
  417. return f'{UNDERLINE}{BOLD}$ [{item_num}] {title}{END_FORMAT}\n\n{description}\n'
  418. def print_stash(stash_list=None):
  419. if not stash_list or len(stash_list) == 0:
  420. stash_list = ['\nSTASH LIST:']
  421. commands = keep_utils.read_commands()
  422. if commands is None or len(commands.items()) == 0:
  423. logging.error('%sNo commands found in stash. '
  424. 'Add a command with "howdoi --%s <query>".%s', RED, STASH_SAVE, END_FORMAT)
  425. return
  426. for _, fields in commands.items():
  427. stash_list.append(format_stash_item(fields))
  428. else:
  429. stash_list = [format_stash_item(x['fields'], i) for i, x in enumerate(stash_list)]
  430. print(build_splitter('#').join(stash_list))
  431. def _get_stash_key(args):
  432. stash_args = {}
  433. ignore_keys = [STASH_SAVE, STASH_VIEW, STASH_REMOVE, STASH_EMPTY, 'tags'] # ignore these for stash key
  434. for key in args:
  435. if key not in ignore_keys:
  436. stash_args[key] = args[key]
  437. return str(stash_args)
  438. def _stash_remove(cmd_key, title):
  439. commands = keep_utils.read_commands()
  440. if commands is not None and cmd_key in commands:
  441. keep_utils.remove_command(cmd_key)
  442. print(f'\n{BOLD}{GREEN}"{title}" removed from stash{END_FORMAT}\n')
  443. else:
  444. print(f'\n{BOLD}{RED}"{title}" not found in stash{END_FORMAT}\n')
  445. def _stash_save(cmd_key, title, answer):
  446. try:
  447. keep_utils.save_command(cmd_key, answer, title)
  448. except FileNotFoundError:
  449. os.system('keep init')
  450. keep_utils.save_command(cmd_key, answer, title)
  451. finally:
  452. print_stash()
  453. def _parse_cmd(args, res):
  454. answer = _format_answers(args, res)
  455. cmd_key = _get_stash_key(args)
  456. title = ''.join(args['query'])
  457. if args[STASH_SAVE]:
  458. _stash_save(cmd_key, title, answer)
  459. return ''
  460. if args[STASH_REMOVE]:
  461. _stash_remove(cmd_key, title)
  462. return ''
  463. return answer
  464. def howdoi(raw_query):
  465. if isinstance(raw_query, str): # you can pass either a raw or a parsed query
  466. parser = get_parser()
  467. args = vars(parser.parse_args(raw_query.split(' ')))
  468. else:
  469. args = raw_query
  470. search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google'
  471. os.environ['HOWDOI_SEARCH_ENGINE'] = search_engine
  472. if search_engine not in SUPPORTED_SEARCH_ENGINES:
  473. supported_search_engines = ', '.join(SUPPORTED_SEARCH_ENGINES)
  474. message = f'Unsupported engine {search_engine}. The supported engines are: {supported_search_engines}'
  475. res = {'error': message}
  476. return _parse_cmd(args, res)
  477. args['query'] = ' '.join(args['query']).replace('?', '')
  478. cache_key = _get_cache_key(args)
  479. if _is_help_query(args['query']):
  480. return _get_help_instructions() + '\n'
  481. res = _get_from_cache(cache_key)
  482. if res:
  483. logging.info('Using cached response (add -C to clear the cache)')
  484. return _parse_cmd(args, res)
  485. logging.info('Fetching answers for query: %s', args["query"])
  486. try:
  487. res = _get_answers(args)
  488. if not res:
  489. message = NO_RESULTS_MESSAGE
  490. if not args['explain']:
  491. message = f'{message} (use --explain to learn why)'
  492. res = {'error': message}
  493. cache.set(cache_key, res)
  494. except (RequestsConnectionError, SSLError):
  495. res = {'error': f'Unable to reach {search_engine}. Do you need to use a proxy?\n'}
  496. except BlockError:
  497. BLOCKED_ENGINES.append(search_engine)
  498. next_engine = next((engine for engine in SUPPORTED_SEARCH_ENGINES if engine not in BLOCKED_ENGINES), None)
  499. if next_engine is None:
  500. res = {'error': 'Unable to get a response from any search engine\n'}
  501. else:
  502. args['search_engine'] = next_engine
  503. args['query'] = args['query'].split()
  504. logging.info('%sRetrying search with %s%s', GREEN, next_engine, END_FORMAT)
  505. return howdoi(args)
  506. return _parse_cmd(args, res)
  507. def get_parser():
  508. parser = argparse.ArgumentParser(description='instant coding answers via the command line',
  509. epilog=textwrap.dedent('''\
  510. environment variable examples:
  511. HOWDOI_COLORIZE=1
  512. HOWDOI_DISABLE_CACHE=1
  513. HOWDOI_DISABLE_SSL=1
  514. HOWDOI_SEARCH_ENGINE=google
  515. HOWDOI_URL=serverfault.com
  516. '''),
  517. formatter_class=argparse.RawTextHelpFormatter)
  518. parser.add_argument('query', metavar='QUERY', type=str, nargs='*', help='the question to answer')
  519. parser.add_argument('-p', '--pos', help='select answer in specified position (default: 1)',
  520. default=1, type=IntRange(1, 20), metavar='POS')
  521. parser.add_argument('-n', '--num', help='number of answers to return (default: 1)',
  522. dest='num_answers', default=1, type=IntRange(1, 20), metavar='NUM')
  523. parser.add_argument('--num-answers', help=argparse.SUPPRESS)
  524. parser.add_argument('-a', '--all', help='display the full text of the answer', action='store_true')
  525. parser.add_argument('-l', '--link', help='display only the answer link', action='store_true')
  526. parser.add_argument('-c', '--color', help='enable colorized output', action='store_true')
  527. parser.add_argument('-x', '--explain', help='explain how answer was chosen', action='store_true')
  528. parser.add_argument('-C', '--clear-cache', help='clear the cache',
  529. action='store_true')
  530. parser.add_argument('-j', '--json', help='return answers in raw json format', dest='json_output',
  531. action='store_true')
  532. parser.add_argument('--json-output', action='store_true', help=argparse.SUPPRESS)
  533. parser.add_argument('-v', '--version', help='display the current version of howdoi',
  534. action='store_true')
  535. parser.add_argument('-e', '--engine', help='search engine for this query (google, bing, duckduckgo)',
  536. dest='search_engine', nargs="?", metavar='ENGINE')
  537. parser.add_argument('--save', '--stash', help='stash a howdoi answer',
  538. action='store_true')
  539. parser.add_argument('--view', help='view your stash',
  540. action='store_true')
  541. parser.add_argument('--remove', help='remove an entry in your stash',
  542. action='store_true')
  543. parser.add_argument('--empty', help='empty your stash',
  544. action='store_true')
  545. parser.add_argument('--sanity-check', help=argparse.SUPPRESS,
  546. action='store_true')
  547. return parser
  548. def _sanity_check(engine, test_query=None):
  549. parser = get_parser()
  550. if not test_query:
  551. test_query = 'format date bash'
  552. args = vars(parser.parse_args(test_query.split()))
  553. args['search_engine'] = engine
  554. try:
  555. result = howdoi(args)
  556. # Perhaps better to use `-j` and then check for an error message
  557. # rather than trying to enumerate all the error strings
  558. assert "Sorry" not in result and "Unable to" not in result
  559. except AssertionError as exc:
  560. if engine == 'google':
  561. raise GoogleValidationError from exc
  562. if engine == 'bing':
  563. raise BingValidationError from exc
  564. raise DDGValidationError from exc
  565. def prompt_stash_remove(args, stash_list, view_stash=True):
  566. if view_stash:
  567. print_stash(stash_list)
  568. last_index = len(stash_list)
  569. prompt = f'{BOLD}> Select a stash command to remove [1-{last_index}] (0 to cancel): {END_FORMAT}'
  570. user_input = input(prompt)
  571. try:
  572. user_input = int(user_input)
  573. if user_input == 0:
  574. return
  575. if user_input < 1 or user_input > last_index:
  576. logging.error('\n%sInput index is invalid.%s', RED, END_FORMAT)
  577. prompt_stash_remove(args, stash_list, False)
  578. return
  579. cmd = stash_list[user_input - 1]
  580. cmd_key = cmd['command']
  581. cmd_name = cmd['fields']['alias']
  582. _stash_remove(cmd_key, cmd_name)
  583. return
  584. except ValueError:
  585. logging.error('\n%sInvalid input. Must specify index of command.%s', RED, END_FORMAT)
  586. prompt_stash_remove(args, stash_list, False)
  587. return
  588. def perform_sanity_check():
  589. '''Perform sanity check.
  590. Returns exit code for program. An exit code of -1 means a validation error was encountered.
  591. '''
  592. global cache # pylint: disable=global-statement,invalid-name
  593. # Disable cache to avoid cached answers while performing the checks
  594. cache = NullCache()
  595. exit_code = 0
  596. for engine in ['google']: # 'bing' and 'duckduckgo' throw various block errors
  597. print(f'Checking {engine}...')
  598. try:
  599. _sanity_check(engine)
  600. except (GoogleValidationError, BingValidationError, DDGValidationError):
  601. logging.error('%s%s query failed%s', RED, engine, END_FORMAT)
  602. exit_code = -1
  603. if exit_code == 0:
  604. print(f'{GREEN}Ok{END_FORMAT}')
  605. return exit_code
  606. def command_line_runner(): # pylint: disable=too-many-return-statements,too-many-branches
  607. parser = get_parser()
  608. args = vars(parser.parse_args())
  609. if args['version']:
  610. print(__version__)
  611. return
  612. if args['explain']:
  613. logging.getLogger().setLevel(logging.INFO)
  614. logging.info('Version: %s', __version__)
  615. if args['sanity_check']:
  616. sys.exit(
  617. perform_sanity_check()
  618. )
  619. if args['clear_cache']:
  620. if _clear_cache():
  621. print(f'{GREEN}Cache cleared successfully{END_FORMAT}')
  622. else:
  623. logging.error('%sClearing cache failed%s', RED, END_FORMAT)
  624. if args[STASH_VIEW]:
  625. print_stash()
  626. return
  627. if args[STASH_EMPTY]:
  628. os.system('keep init')
  629. return
  630. if args[STASH_REMOVE] and len(args['query']) == 0:
  631. commands = keep_utils.read_commands()
  632. if commands is None or len(commands.items()) == 0:
  633. logging.error('%sNo commands found in stash. '
  634. 'Add a command with "howdoi --%s <query>".%s', RED, STASH_SAVE, END_FORMAT)
  635. return
  636. stash_list = [{'command': cmd, 'fields': field} for cmd, field in commands.items()]
  637. prompt_stash_remove(args, stash_list)
  638. return
  639. if not args['query']:
  640. parser.print_help()
  641. return
  642. if os.getenv('HOWDOI_COLORIZE'):
  643. args['color'] = True
  644. howdoi_result = howdoi(args)
  645. if os.name == 'nt':
  646. # Windows
  647. print(howdoi_result)
  648. else:
  649. utf8_result = howdoi_result.encode('utf-8', 'ignore')
  650. # Write UTF-8 to stdout: https://stackoverflow.com/a/3603160
  651. sys.stdout.buffer.write(utf8_result)
  652. # close the session to release connection
  653. howdoi_session.close()
  654. if __name__ == '__main__':
  655. command_line_runner()