From e83fd89c8a76f069f1fd57db407c570785ca2ced Mon Sep 17 00:00:00 2001
From: Igor Chubin <igor@chub.in>
Date: Sun, 17 Feb 2019 14:12:08 +0100
Subject: [PATCH] routing refactoring (get_answer.py => routing.py +
 postprocessing.py)

---
 lib/adapter/adapter.py   |   2 +-
 lib/adapter/internal.py  |  13 +++
 lib/adapter/learnxiny.py |   3 +
 lib/adapter/question.py  |   2 +
 lib/adapter/rosetta.py   |   2 +-
 lib/cheat_wrapper.py     |  41 ++++++-
 lib/routing.py           | 246 ++++++++++-----------------------------
 7 files changed, 120 insertions(+), 189 deletions(-)

diff --git a/lib/adapter/adapter.py b/lib/adapter/adapter.py
index 259fb2b..69957dc 100644
--- a/lib/adapter/adapter.py
+++ b/lib/adapter/adapter.py
@@ -21,7 +21,7 @@ class Adapter(object):
         if prefix in self._list:
             return self._list[prefix]
 
-        self._list[prefix] = self._get_list(prefix=prefix)
+        self._list[prefix] = set(self._get_list(prefix=prefix))
         return self._list[prefix]
 
     def is_found(self, topic):
diff --git a/lib/adapter/internal.py b/lib/adapter/internal.py
index 6fbb486..f3f9c74 100644
--- a/lib/adapter/internal.py
+++ b/lib/adapter/internal.py
@@ -124,3 +124,16 @@ Do you mean one of these topics maybe?
 
 %s
     """ % possible_topics_text
+
+class Search(Adapter):
+
+    _adapter_name = 'search'
+    _output_format = 'text'
+    _cache_needed = False
+
+    @staticmethod
+    def get_list(prefix=None):
+        return []
+
+    def is_found(topic):
+        return True
diff --git a/lib/adapter/learnxiny.py b/lib/adapter/learnxiny.py
index c34e33c..2286325 100644
--- a/lib/adapter/learnxiny.py
+++ b/lib/adapter/learnxiny.py
@@ -825,6 +825,9 @@ class LearnXinY(Adapter):
         Return whether `topic` is a valid learnxiny topic
         """
 
+        if '/' not in topic:
+            return False
+
         lang, topic = topic.split('/', 1)
         if lang not in self.adapters:
             return False
diff --git a/lib/adapter/question.py b/lib/adapter/question.py
index c98d0fb..823370a 100644
--- a/lib/adapter/question.py
+++ b/lib/adapter/question.py
@@ -32,8 +32,10 @@ class Question(Adapter):
             section_name, topic = topic.split('/', 1)
             if ':' in section_name:
                 _, section_name = section_name.split(':', 1)
+            section_name = SO_NAME.get(section_name, section_name)
             topic = "%s/%s" % (section_name, topic)
 
+
         # some clients send queries with - instead of + so we have to rewrite them to
         topic = re.sub(r"(?<!-)-", ' ', topic)
 
diff --git a/lib/adapter/rosetta.py b/lib/adapter/rosetta.py
index 2e7eda9..b56b0ea 100644
--- a/lib/adapter/rosetta.py
+++ b/lib/adapter/rosetta.py
@@ -23,7 +23,7 @@ class Rosetta(Adapter):
     Adapter for RosettaCode
     """
 
-    __section_name = 'rosetta'
+    __section_name = "rosetta"
     _adapter_name = "rosetta"
     _output_format = "code"
 
diff --git a/lib/cheat_wrapper.py b/lib/cheat_wrapper.py
index 02932e4..9f66efb 100644
--- a/lib/cheat_wrapper.py
+++ b/lib/cheat_wrapper.py
@@ -11,7 +11,11 @@ Exports:
 import re
 import json
 
-from get_answer import get_answer, find_answer_by_keyword, get_topics_list
+from routing import get_answer_dict, get_topics_list
+from search import find_answers_by_keyword
+from languages_data import LANGUAGE_ALIAS, rewrite_editor_section_name
+import postprocessing
+
 import frontend.html
 import frontend.ansi
 
@@ -22,6 +26,28 @@ def cheat_wrapper(query, request_options=None, output_format='ansi'):
     Additional request options specified in `request_options`.
     """
 
+    def _rewrite_aliases(word):
+        if word == ':bash.completion':
+            return ':bash_completion'
+        return word
+
+    def _rewrite_section_name(query):
+        """
+        Rewriting special section names:
+        * EDITOR:NAME => emacs:go-mode
+        """
+
+        if '/' not in query:
+            return query
+
+        section_name, rest = query.split('/', 1)
+
+        if ':' in section_name:
+            section_name = rewrite_editor_section_name(section_name)
+        section_name = LANGUAGE_ALIAS.get(section_name, section_name)
+
+        return "%s/%s" % (section_name, rest)
+
     def _sanitize_query(query):
         return re.sub('[<>"]', '', query)
 
@@ -48,6 +74,9 @@ def cheat_wrapper(query, request_options=None, output_format='ansi'):
         return topic, keyword, search_options
 
     query = _sanitize_query(query)
+    query = _rewrite_aliases(query)
+    query = _rewrite_section_name(query)
+
 
     # at the moment, we just remove trailing slashes
     # so queries python/ and python are equal
@@ -55,10 +84,16 @@ def cheat_wrapper(query, request_options=None, output_format='ansi'):
     topic, keyword, search_options = _parse_query(query)
 
     if keyword:
-        answers = find_answer_by_keyword(
+        answers = find_answers_by_keyword(
             topic, keyword, options=search_options, request_options=request_options)
     else:
-        answers = [get_answer(topic, keyword, request_options=request_options)]
+        answers = [get_answer_dict(topic, request_options=request_options)]
+
+    answers = [
+        postprocessing.postprocess(
+            answer, keyword, search_options, request_options=request_options)
+        for answer in answers
+    ]
 
     answer_data = {
         'query': query,
diff --git a/lib/routing.py b/lib/routing.py
index 67f1ee9..ebc70a8 100644
--- a/lib/routing.py
+++ b/lib/routing.py
@@ -1,22 +1,14 @@
 """
-Main module, answers hub.
+Queries routing and caching.
 
 Exports:
 
     get_topics_list()
-    get_answer()
-    find_answer_by_keyword()
+    get_answer_dict()
 """
 from __future__ import print_function
 
-import os
 import re
-import redis
-
-from globals import REDISHOST, MAX_SEARCH_LEN
-from languages_data import LANGUAGE_ALIAS, SO_NAME, rewrite_editor_section_name
-
-import fmt.comments
 
 import cache
 import adapter.cheat_sheets
@@ -30,17 +22,29 @@ import adapter.rosetta
 class Router(object):
 
     """
-    Implementation of query routing.
-    Routing is done basing on the data exported by the adapters.
-    (mainly by functions get_list() and is_found()).
+    Implementation of query routing. Routing is based on `routing_table`
+    and the data exported by the adapters (functions `get_list()` and `is_found()`).
 
-    Function get_topics_list() returns available topics
-    (that are accessible at /:list).
-
-    Function get_topic_type() delivers name of the adapter,
-    that will process the query.
+    `get_topics_list()` returns available topics (accessible at /:list).
+    `get_answer_dict()` return answer for the query.
     """
 
+    routing_table = [
+        ("^$", "search"),
+        ("^[^/]*/rosetta(/|$)", "rosetta"),
+        ("^:", "internal"),
+        ("/:list$", "internal"),
+        ("/$", "cheat.sheets dir"),
+        ("", "cheat.sheets"),
+        ("", "cheat"),
+        ("", "tldr"),
+        ("", "late.nz"),
+        ("", "fosdem"),
+        ("^[^/]*$", "unknown"),
+        ("", "learnxiny"),
+        ("^[a-z][a-z]-[a-z][a-z]$", "translation"),
+    ]
+
     def __init__(self):
 
         self._cached_topics_list = []
@@ -53,6 +57,7 @@ class Router(object):
             "unknown": adapter.internal.UnknownPages(
                 get_topic_type=self.get_topic_type,
                 get_topics_list=self.get_topics_list),
+            "search": adapter.internal.Search(),
             "tldr": adapter.cmd.Tldr(),
             "cheat": adapter.cmd.Cheat(),
             "fosdem": adapter.cmd.Fosdem(),
@@ -99,37 +104,20 @@ class Router(object):
 
         def __get_topic_type(topic):
 
-            routing_table = [
-                ("^$", "search"),
-                ("^[^/]*/rosetta(/|$)", "rosetta"),
-                ("^:", "internal"),
-                ("/:list$", "internal"),
-                ("/$", "cheat.sheets dir"),
-                ("", "cheat.sheets"),
-                ("", "cheat"),
-                ("", "tldr"),
-                ("", "late.nz"),
-                ("", "fosdem"),
-                ("^[/]*$", "unknown"),
-                ("", "learnxiny"),
-                ("^[a-z][a-z]-[a-z][a-z]$", "translation"),
-            ]
-
-            for regexp, route in routing_table:
+            for regexp, route in self.routing_table:
                 if re.search(regexp, topic):
                     if route in self._adapter:
                         if self._adapter[route].is_found(topic):
                             return route
                     else:
                         return route
-
             return 'question'
 
         if topic not in self._cached_topic_type:
             self._cached_topic_type[topic] = __get_topic_type(topic)
         return self._cached_topic_type[topic]
 
-    def get_page_dict(self, query, request_options=None):
+    def _get_page_dict(self, query, request_options=None):
         """
         Return answer_dict for the `query`.
         """
@@ -138,162 +126,52 @@ class Router(object):
         return self._adapter[topic_type]\
                .get_page_dict(query, request_options=request_options)
 
-if os.environ.get('REDIS_HOST', '').lower() != 'none':
-    REDIS = redis.StrictRedis(host=REDISHOST, port=6379, db=0)
-else:
-    REDIS = None
-
-_ROUTER = Router()
-get_topics_list = _ROUTER.get_topics_list
-
-def get_answer(topic, keyword, options="", request_options=None): # pylint: disable=too-many-locals,too-many-branches,too-many-statements
-    """
-    Find cheat sheet for the topic.
-    If `keyword` is None or rempty, return the whole answer.
-    Otherwise cut the paragraphs containing keywords.
-
-    Args:
-        topic (str):    the name of the topic of the cheat sheet
-        keyword (str):  the name of the keywords to search in the cheat sheets
-
-    Returns:
-        string:         the cheat sheet
-    """
-
-    def _join_paragraphs(paragraphs):
-        answer = "\n".join(paragraphs)
-        return answer
-
-    def _split_paragraphs(text):
-        answer = []
-        paragraph = ""
-        for line in text.splitlines():
-            if line == "":
-                answer.append(paragraph)
-                paragraph = ""
-            else:
-                paragraph += line+"\n"
-        answer.append(paragraph)
-        return answer
-
-    def _paragraph_contains(paragraph, keyword, insensitive=False, word_boundaries=True):
+    def get_answer_dict(self, topic, request_options=None):
         """
-        Check if `paragraph` contains `keyword`.
-        Several keywords can be joined together using ~
-        For example: ~ssh~passphrase
+        Find cheat sheet for the topic.
+
+        Args:
+            `topic` (str):    the name of the topic of the cheat sheet
+
+        Returns:
+            answer_dict:      the answer dictionary
         """
-        answer = True
 
-        if '~' in keyword:
-            keywords = keyword.split('~')
-        else:
-            keywords = [keyword]
+        topic_type = self.get_topic_type(topic)
 
-        for kwrd in keywords:
-            regex = re.escape(kwrd)
-            if not word_boundaries:
-                regex = r"\b%s\b" % kwrd
-
-            if insensitive:
-                answer = answer and bool(re.search(regex, paragraph, re.IGNORECASE))
-            else:
-                answer = answer and bool(re.search(regex, paragraph))
-
-        return answer
-
-    def _rewrite_aliases(word):
-        if word == ':bash.completion':
-            return ':bash_completion'
-        return word
-
-    def _rewrite_section_name(query):
-        """
-        """
-        if '/' not in query:
-            return query
-
-        section_name, rest = query.split('/', 1)
-
-        if ':' in section_name:
-            # if ':' is in section_name, it means, that we want to
-            # translate the answer in the specified human language
-            # (experimental)
-            language, section_name = section_name.split(':', 1)
-        else:
-            language = ""
-
-        section_name = LANGUAGE_ALIAS.get(section_name, section_name)
-
-        if language:
-            section_name = language + ":" + section_name
-
-        return "%s/%s" % (section_name, rest)
-
-    def _rewrite_section_name_for_q(query):
-        """
-        FIXME: we rewrite the section name too earlier,
-        what means that we have to use SO names everywhere,
-        where actually canonified internal names shoud be used.
-        After this thing is fixed, we should:
-        * fix naming in cache
-        * fix VIM_NAMES
-        """
-        if '/' not in query:
-            return query
-
-        section_name, rest = query.split('/', 1)
-        if ':' in section_name:
-            section_name = rewrite_editor_section_name(section_name)
-
-        section_name = SO_NAME.get(section_name, section_name)
-        return "%s/%s" % (section_name, rest)
-
-
-    answer = None
-    needs_beautification = False
-
-    topic = _rewrite_aliases(topic)
-    topic = _rewrite_section_name(topic)
-
-    # This is pretty unoptimal, so this part should be rewritten.
-    # For the most queries we could say immediately, # what type the query has.
-    topic_type = _ROUTER.get_topic_type(topic)
-
-    # Checking if the answer is in the cache
-    if topic != "":
-        # Temporary hack for "questions": # the topic name has to be prefixed with `q:`
-        # so we can later delete them from REDIS.
-        # And we known that they need beautification
+        # 'question' queries are pretty expensive, that's why they should be handled
+        # in a special way:
+        # we do not drop the old style cache entries and try to reuse them if possible
         if topic_type == 'question':
-            topic = _rewrite_section_name_for_q(topic)
-            topic = "q:" + topic
-            needs_beautification = True
+            answer = cache.get('q:' + topic)
+            if answer:
+                if isinstance(answer, dict):
+                    return answer
+                return {
+                    'topic': topic,
+                    'topic_type': 'question',
+                    'answer': answer,
+                    'format': 'text+code',
+                    }
 
-        if REDIS:
-            answer = REDIS.get(topic)
-        if answer:
-            answer = answer.decode('utf-8')
+            answer = self._get_page_dict(topic, request_options=request_options)
+            cache.put('q:' + topic, answer)
+            return answer
 
-    # If answer was not found in the cache, try to find it in one of the repositories
-    if not answer:
-        answer = _ROUTER.get_page_dict(topic, request_options=request_options)
+        # Try to find cacheable queries in the cache.
+        # If answer was not found in the cache, resolve it in a normal way and save in the cache
+        cache_needed = self._adapter[topic_type].is_cache_needed()
+        if cache_needed:
+            answer = cache.get(topic)
+            if not isinstance(answer, dict):
+                answer = None
+            if answer:
+                return answer
 
-        # saving answers in the cache
-        if REDIS:
-            if answer and answer['topic_type'] not in ["search", "internal", "unknown"]:
-                REDIS.set(topic, answer)
+        answer = self._get_page_dict(topic, request_options=request_options)
 
-    if needs_beautification:
-        filetype = 'bash'
-        if '/' in topic:
-            filetype = topic.split('/', 1)[0]
-            if filetype.startswith('q:'):
-                filetype = filetype[2:]
-
-        answer['answer'] = fmt.comments.beautify(
-            answer['answer'].encode('utf-8'), filetype, request_options)
-
-    if not keyword:
+        if cache_needed and answer:
+            cache.put(topic, answer)
         return answer
 
 # pylint: disable=invalid-name