From e83fd89c8a76f069f1fd57db407c570785ca2ced Mon Sep 17 00:00:00 2001 From: Igor Chubin Date: Sun, 17 Feb 2019 14:12:08 +0100 Subject: [PATCH] routing refactoring (get_answer.py => routing.py + postprocessing.py) --- lib/adapter/adapter.py | 2 +- lib/adapter/internal.py | 13 +++ lib/adapter/learnxiny.py | 3 + lib/adapter/question.py | 2 + lib/adapter/rosetta.py | 2 +- lib/cheat_wrapper.py | 41 ++++++- lib/routing.py | 246 ++++++++++----------------------------- 7 files changed, 120 insertions(+), 189 deletions(-) diff --git a/lib/adapter/adapter.py b/lib/adapter/adapter.py index 259fb2b..69957dc 100644 --- a/lib/adapter/adapter.py +++ b/lib/adapter/adapter.py @@ -21,7 +21,7 @@ class Adapter(object): if prefix in self._list: return self._list[prefix] - self._list[prefix] = self._get_list(prefix=prefix) + self._list[prefix] = set(self._get_list(prefix=prefix)) return self._list[prefix] def is_found(self, topic): diff --git a/lib/adapter/internal.py b/lib/adapter/internal.py index 6fbb486..f3f9c74 100644 --- a/lib/adapter/internal.py +++ b/lib/adapter/internal.py @@ -124,3 +124,16 @@ Do you mean one of these topics maybe? %s """ % possible_topics_text + +class Search(Adapter): + + _adapter_name = 'search' + _output_format = 'text' + _cache_needed = False + + @staticmethod + def get_list(prefix=None): + return [] + + def is_found(topic): + return True diff --git a/lib/adapter/learnxiny.py b/lib/adapter/learnxiny.py index c34e33c..2286325 100644 --- a/lib/adapter/learnxiny.py +++ b/lib/adapter/learnxiny.py @@ -825,6 +825,9 @@ class LearnXinY(Adapter): Return whether `topic` is a valid learnxiny topic """ + if '/' not in topic: + return False + lang, topic = topic.split('/', 1) if lang not in self.adapters: return False diff --git a/lib/adapter/question.py b/lib/adapter/question.py index c98d0fb..823370a 100644 --- a/lib/adapter/question.py +++ b/lib/adapter/question.py @@ -32,8 +32,10 @@ class Question(Adapter): section_name, topic = topic.split('/', 1) if ':' in section_name: _, section_name = section_name.split(':', 1) + section_name = SO_NAME.get(section_name, section_name) topic = "%s/%s" % (section_name, topic) + # some clients send queries with - instead of + so we have to rewrite them to topic = re.sub(r"(? emacs:go-mode + """ + + if '/' not in query: + return query + + section_name, rest = query.split('/', 1) + + if ':' in section_name: + section_name = rewrite_editor_section_name(section_name) + section_name = LANGUAGE_ALIAS.get(section_name, section_name) + + return "%s/%s" % (section_name, rest) + def _sanitize_query(query): return re.sub('[<>"]', '', query) @@ -48,6 +74,9 @@ def cheat_wrapper(query, request_options=None, output_format='ansi'): return topic, keyword, search_options query = _sanitize_query(query) + query = _rewrite_aliases(query) + query = _rewrite_section_name(query) + # at the moment, we just remove trailing slashes # so queries python/ and python are equal @@ -55,10 +84,16 @@ def cheat_wrapper(query, request_options=None, output_format='ansi'): topic, keyword, search_options = _parse_query(query) if keyword: - answers = find_answer_by_keyword( + answers = find_answers_by_keyword( topic, keyword, options=search_options, request_options=request_options) else: - answers = [get_answer(topic, keyword, request_options=request_options)] + answers = [get_answer_dict(topic, request_options=request_options)] + + answers = [ + postprocessing.postprocess( + answer, keyword, search_options, request_options=request_options) + for answer in answers + ] answer_data = { 'query': query, diff --git a/lib/routing.py b/lib/routing.py index 67f1ee9..ebc70a8 100644 --- a/lib/routing.py +++ b/lib/routing.py @@ -1,22 +1,14 @@ """ -Main module, answers hub. +Queries routing and caching. Exports: get_topics_list() - get_answer() - find_answer_by_keyword() + get_answer_dict() """ from __future__ import print_function -import os import re -import redis - -from globals import REDISHOST, MAX_SEARCH_LEN -from languages_data import LANGUAGE_ALIAS, SO_NAME, rewrite_editor_section_name - -import fmt.comments import cache import adapter.cheat_sheets @@ -30,17 +22,29 @@ import adapter.rosetta class Router(object): """ - Implementation of query routing. - Routing is done basing on the data exported by the adapters. - (mainly by functions get_list() and is_found()). + Implementation of query routing. Routing is based on `routing_table` + and the data exported by the adapters (functions `get_list()` and `is_found()`). - Function get_topics_list() returns available topics - (that are accessible at /:list). - - Function get_topic_type() delivers name of the adapter, - that will process the query. + `get_topics_list()` returns available topics (accessible at /:list). + `get_answer_dict()` return answer for the query. """ + routing_table = [ + ("^$", "search"), + ("^[^/]*/rosetta(/|$)", "rosetta"), + ("^:", "internal"), + ("/:list$", "internal"), + ("/$", "cheat.sheets dir"), + ("", "cheat.sheets"), + ("", "cheat"), + ("", "tldr"), + ("", "late.nz"), + ("", "fosdem"), + ("^[^/]*$", "unknown"), + ("", "learnxiny"), + ("^[a-z][a-z]-[a-z][a-z]$", "translation"), + ] + def __init__(self): self._cached_topics_list = [] @@ -53,6 +57,7 @@ class Router(object): "unknown": adapter.internal.UnknownPages( get_topic_type=self.get_topic_type, get_topics_list=self.get_topics_list), + "search": adapter.internal.Search(), "tldr": adapter.cmd.Tldr(), "cheat": adapter.cmd.Cheat(), "fosdem": adapter.cmd.Fosdem(), @@ -99,37 +104,20 @@ class Router(object): def __get_topic_type(topic): - routing_table = [ - ("^$", "search"), - ("^[^/]*/rosetta(/|$)", "rosetta"), - ("^:", "internal"), - ("/:list$", "internal"), - ("/$", "cheat.sheets dir"), - ("", "cheat.sheets"), - ("", "cheat"), - ("", "tldr"), - ("", "late.nz"), - ("", "fosdem"), - ("^[/]*$", "unknown"), - ("", "learnxiny"), - ("^[a-z][a-z]-[a-z][a-z]$", "translation"), - ] - - for regexp, route in routing_table: + for regexp, route in self.routing_table: if re.search(regexp, topic): if route in self._adapter: if self._adapter[route].is_found(topic): return route else: return route - return 'question' if topic not in self._cached_topic_type: self._cached_topic_type[topic] = __get_topic_type(topic) return self._cached_topic_type[topic] - def get_page_dict(self, query, request_options=None): + def _get_page_dict(self, query, request_options=None): """ Return answer_dict for the `query`. """ @@ -138,162 +126,52 @@ class Router(object): return self._adapter[topic_type]\ .get_page_dict(query, request_options=request_options) -if os.environ.get('REDIS_HOST', '').lower() != 'none': - REDIS = redis.StrictRedis(host=REDISHOST, port=6379, db=0) -else: - REDIS = None - -_ROUTER = Router() -get_topics_list = _ROUTER.get_topics_list - -def get_answer(topic, keyword, options="", request_options=None): # pylint: disable=too-many-locals,too-many-branches,too-many-statements - """ - Find cheat sheet for the topic. - If `keyword` is None or rempty, return the whole answer. - Otherwise cut the paragraphs containing keywords. - - Args: - topic (str): the name of the topic of the cheat sheet - keyword (str): the name of the keywords to search in the cheat sheets - - Returns: - string: the cheat sheet - """ - - def _join_paragraphs(paragraphs): - answer = "\n".join(paragraphs) - return answer - - def _split_paragraphs(text): - answer = [] - paragraph = "" - for line in text.splitlines(): - if line == "": - answer.append(paragraph) - paragraph = "" - else: - paragraph += line+"\n" - answer.append(paragraph) - return answer - - def _paragraph_contains(paragraph, keyword, insensitive=False, word_boundaries=True): + def get_answer_dict(self, topic, request_options=None): """ - Check if `paragraph` contains `keyword`. - Several keywords can be joined together using ~ - For example: ~ssh~passphrase + Find cheat sheet for the topic. + + Args: + `topic` (str): the name of the topic of the cheat sheet + + Returns: + answer_dict: the answer dictionary """ - answer = True - if '~' in keyword: - keywords = keyword.split('~') - else: - keywords = [keyword] + topic_type = self.get_topic_type(topic) - for kwrd in keywords: - regex = re.escape(kwrd) - if not word_boundaries: - regex = r"\b%s\b" % kwrd - - if insensitive: - answer = answer and bool(re.search(regex, paragraph, re.IGNORECASE)) - else: - answer = answer and bool(re.search(regex, paragraph)) - - return answer - - def _rewrite_aliases(word): - if word == ':bash.completion': - return ':bash_completion' - return word - - def _rewrite_section_name(query): - """ - """ - if '/' not in query: - return query - - section_name, rest = query.split('/', 1) - - if ':' in section_name: - # if ':' is in section_name, it means, that we want to - # translate the answer in the specified human language - # (experimental) - language, section_name = section_name.split(':', 1) - else: - language = "" - - section_name = LANGUAGE_ALIAS.get(section_name, section_name) - - if language: - section_name = language + ":" + section_name - - return "%s/%s" % (section_name, rest) - - def _rewrite_section_name_for_q(query): - """ - FIXME: we rewrite the section name too earlier, - what means that we have to use SO names everywhere, - where actually canonified internal names shoud be used. - After this thing is fixed, we should: - * fix naming in cache - * fix VIM_NAMES - """ - if '/' not in query: - return query - - section_name, rest = query.split('/', 1) - if ':' in section_name: - section_name = rewrite_editor_section_name(section_name) - - section_name = SO_NAME.get(section_name, section_name) - return "%s/%s" % (section_name, rest) - - - answer = None - needs_beautification = False - - topic = _rewrite_aliases(topic) - topic = _rewrite_section_name(topic) - - # This is pretty unoptimal, so this part should be rewritten. - # For the most queries we could say immediately, # what type the query has. - topic_type = _ROUTER.get_topic_type(topic) - - # Checking if the answer is in the cache - if topic != "": - # Temporary hack for "questions": # the topic name has to be prefixed with `q:` - # so we can later delete them from REDIS. - # And we known that they need beautification + # 'question' queries are pretty expensive, that's why they should be handled + # in a special way: + # we do not drop the old style cache entries and try to reuse them if possible if topic_type == 'question': - topic = _rewrite_section_name_for_q(topic) - topic = "q:" + topic - needs_beautification = True + answer = cache.get('q:' + topic) + if answer: + if isinstance(answer, dict): + return answer + return { + 'topic': topic, + 'topic_type': 'question', + 'answer': answer, + 'format': 'text+code', + } - if REDIS: - answer = REDIS.get(topic) - if answer: - answer = answer.decode('utf-8') + answer = self._get_page_dict(topic, request_options=request_options) + cache.put('q:' + topic, answer) + return answer - # If answer was not found in the cache, try to find it in one of the repositories - if not answer: - answer = _ROUTER.get_page_dict(topic, request_options=request_options) + # Try to find cacheable queries in the cache. + # If answer was not found in the cache, resolve it in a normal way and save in the cache + cache_needed = self._adapter[topic_type].is_cache_needed() + if cache_needed: + answer = cache.get(topic) + if not isinstance(answer, dict): + answer = None + if answer: + return answer - # saving answers in the cache - if REDIS: - if answer and answer['topic_type'] not in ["search", "internal", "unknown"]: - REDIS.set(topic, answer) + answer = self._get_page_dict(topic, request_options=request_options) - if needs_beautification: - filetype = 'bash' - if '/' in topic: - filetype = topic.split('/', 1)[0] - if filetype.startswith('q:'): - filetype = filetype[2:] - - answer['answer'] = fmt.comments.beautify( - answer['answer'].encode('utf-8'), filetype, request_options) - - if not keyword: + if cache_needed and answer: + cache.put(topic, answer) return answer # pylint: disable=invalid-name