From 80ded6938c72c9bcb0b9ec026fe8c7fcb9bc65cf Mon Sep 17 00:00:00 2001 From: Igor Chubin Date: Wed, 5 Aug 2020 09:46:36 +0200 Subject: [PATCH] fixed search (#147, #225) --- lib/postprocessing.py | 38 ++------------------------- lib/search.py | 61 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/lib/postprocessing.py b/lib/postprocessing.py index d54a359..bc82c1e 100644 --- a/lib/postprocessing.py +++ b/lib/postprocessing.py @@ -1,4 +1,4 @@ -import re +import search import fmt.comments def postprocess(answer, keyword, options, request_options=None): @@ -49,42 +49,8 @@ def _filter_by_keyword(answer, keyword, options): answer.append(paragraph) return answer - def _paragraph_contains(paragraph, keyword, insensitive=False, word_boundaries=True): - """ - Check if `paragraph` contains `keyword`. - Several keywords can be joined together using ~ - For example: ~ssh~passphrase - """ - answer = True - - if '~' in keyword: - keywords = keyword.split('~') - else: - keywords = [keyword] - - for kwrd in keywords: - regex = re.escape(kwrd) - if not word_boundaries: - regex = r"\b%s\b" % kwrd - - if insensitive: - answer = answer and bool(re.search(regex, paragraph, re.IGNORECASE)) - else: - answer = answer and bool(re.search(regex, paragraph)) - - return answer - - - if not keyword: - return answer - - search_options = { - 'insensitive': 'i' in options, - 'word_boundaries': 'b' in options - } - paragraphs = [p for p in _split_paragraphs(answer) - if _paragraph_contains(p, keyword, **search_options)] + if search.match(p, keyword, options=options)] if not paragraphs: return "" diff --git a/lib/search.py b/lib/search.py index 9b0bfed..58e205c 100644 --- a/lib/search.py +++ b/lib/search.py @@ -19,6 +19,8 @@ Configuration parameters: search.limit """ +import re + from config import CONFIG from routing import get_answer_dict, get_topics_list @@ -30,13 +32,60 @@ def _limited_entry(): 'format': "code", } +def _parse_options(options): + """Parse search options string into optiond_dict + """ + + if options is None: + return {} + + search_options = { + 'insensitive': 'i' in options, + 'word_boundaries': 'b' in options, + 'recursive': 'r' in options, + } + return search_options + +def match(paragraph, keyword, options=None, options_dict=None): + """Search for each keyword from `keywords` in `page` + and if all of them are found, return `True`. + Otherwise return `False`. + + Several keywords can be joined together using ~ + For example: ~ssh~passphrase + """ + + if '~' in keyword: + keywords = keyword.split('~') + else: + keywords = [keyword] + + if options_dict is None: + options_dict = _parse_options(options) + + for kwrd in keywords: + if not kwrd: + continue + + regex = re.escape(kwrd) + if options_dict["word_boundaries"]: + regex = r"\b%s\b" % kwrd + + if options_dict["insensitive"]: + if not re.search(regex, paragraph, re.IGNORECASE): + return False + else: + if not re.search(regex, paragraph): + return False + return True + def find_answers_by_keyword(directory, keyword, options="", request_options=None): """ Search in the whole tree of all cheatsheets or in its subtree `directory` by `keyword` """ - recursive = 'r' in options + options_dict = _parse_options(options) answers_found = [] for topic in get_topics_list(skip_internal=True, skip_dirs=True): @@ -45,13 +94,13 @@ def find_answers_by_keyword(directory, keyword, options="", request_options=None continue subtopic = topic[len(directory):] - if not recursive and '/' in subtopic: + if not options_dict["recursive"] and '/' in subtopic: continue - answer = get_answer_dict(topic, request_options=request_options) - - if answer and answer.get('answer') and keyword.lower() in answer.get('answer', '').lower(): - answers_found.append(answer) + answer_dict = get_answer_dict(topic, request_options=request_options) + answer_text = answer_dict.get('answer', '') + if match(answer_text, keyword, options_dict=options_dict): + answers_found.append(answer_dict) if len(answers_found) > CONFIG['search.limit']: answers_found.append(