mirror of
https://github.com/chubin/cheat.sh.git
synced 2026-06-20 13:16:44 +02:00
added lib/beautifier.py
This commit is contained in:
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Extract text from the text-code stream and comment it.
|
||||
|
||||
Supports three modes of normalization and commenting:
|
||||
|
||||
1. Don't add any comments
|
||||
2. Add comments
|
||||
3. Remove text, leave code only
|
||||
|
||||
Since several operations are quite expensice,
|
||||
actively uses caching.
|
||||
|
||||
Exported functions:
|
||||
|
||||
normalize(text, mode)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import textwrap
|
||||
import subprocess
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from itertools import groupby, chain
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
import redis
|
||||
|
||||
# pylint: disable=wrong-import-position,wrong-import-order
|
||||
MYDIR = os.path.abspath(os.path.dirname(os.path.dirname('__file__')))
|
||||
sys.path.append("%s/lib/" % MYDIR)
|
||||
from languages_data import VIM_NAME
|
||||
from globals import PATH_VIM_ENVIRONMENT
|
||||
# pylint: enable=wrong-import-position,wrong-import-order
|
||||
|
||||
REDIS = redis.StrictRedis(host='localhost', port=6379, db=1)
|
||||
FNULL = open(os.devnull, 'w')
|
||||
|
||||
def _language_name(name):
|
||||
return VIM_NAME.get(name, name)
|
||||
|
||||
def _cleanup_lines(lines):
|
||||
"""
|
||||
Cleanup `lines` a little bit: remove empty lines at the beginning
|
||||
and at the end; remove to much empty lines in between.
|
||||
"""
|
||||
|
||||
if lines == []:
|
||||
return lines
|
||||
|
||||
# remove empty lines from the beginning
|
||||
start = 0
|
||||
while start < len(lines) and lines[start].strip() == '':
|
||||
start += 1
|
||||
lines = lines[start:]
|
||||
if lines == []:
|
||||
return lines
|
||||
|
||||
# remove empty lines from the end
|
||||
end = len(lines) - 1
|
||||
while end >= 0 and lines[end].strip() == '':
|
||||
end -= 1
|
||||
lines = lines[:end+1]
|
||||
if lines == []:
|
||||
return lines
|
||||
|
||||
# remove repeating empty lines
|
||||
lines = list(chain.from_iterable(
|
||||
[(list(x[1]) if x[0] else [''])
|
||||
for x in groupby(lines, key=lambda x: x.strip() != '')]))
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def _classify_lines(lines):
|
||||
"""
|
||||
Classify each line and say which of them
|
||||
are text (0) and which of them are code (1).
|
||||
|
||||
A line is considered to be code,
|
||||
if it starts with four spaces.
|
||||
|
||||
A line is considerer to be text if it is not
|
||||
empty and is not code.
|
||||
|
||||
If line is empty, it is considered to be
|
||||
code if it surrounded but two other code lines
|
||||
(or if it is the first/last line and it has
|
||||
code on the other side.
|
||||
"""
|
||||
|
||||
def _line_type(line):
|
||||
if line.strip() == '':
|
||||
return -1
|
||||
|
||||
# some line may start with spaces but still be not code.
|
||||
# we need some heuristics here, but for the moment just
|
||||
# whitelist such cases:
|
||||
if line.strip().startswith('* ') or re.match(r'[0-9]+\.', line.strip()):
|
||||
return 0
|
||||
|
||||
if line.startswith(' '):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
line_types = [_line_type(line) for line in lines]
|
||||
|
||||
# pass 2:
|
||||
# adding empty code lines to the code
|
||||
for i in range(len(line_types) - 1):
|
||||
if line_types[i] == 1 and line_types[i+1] == -1:
|
||||
line_types[i+1] = -2
|
||||
changed = True
|
||||
|
||||
for i in range(len(line_types) - 1)[::-1]:
|
||||
if line_types[i] == -1 and line_types[i+1] == 1:
|
||||
line_types[i] = -2
|
||||
changed = True
|
||||
line_types = [1 if x == -2 else x for x in line_types]
|
||||
|
||||
# pass 3:
|
||||
# fixing undefined line types (-1)
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
|
||||
# changing all lines types that are near the text
|
||||
|
||||
for i in range(len(line_types) - 1):
|
||||
if line_types[i] == 0 and line_types[i+1] == -1:
|
||||
line_types[i+1] = 0
|
||||
changed = True
|
||||
|
||||
for i in range(len(line_types) - 1)[::-1]:
|
||||
if line_types[i] == -1 and line_types[i+1] == 0:
|
||||
line_types[i] = 0
|
||||
changed = True
|
||||
|
||||
# everything what is still undefined, change to 1
|
||||
line_types = [1 if x == -1 else x for x in line_types]
|
||||
return line_types
|
||||
|
||||
def _wrap_lines(lines_classes, shift_code=False):
|
||||
"""
|
||||
Wrap classified lines. Add the splitted lines to the stream.
|
||||
If `shift_code` is True, remove leading four spaces.
|
||||
"""
|
||||
|
||||
def _shift_code(line, shift=0):
|
||||
#if line.startswith(' '):
|
||||
# return line[4:]
|
||||
|
||||
if shift == 1 and line != '':
|
||||
return ' ' + line
|
||||
|
||||
if shift == 3:
|
||||
if line.startswith(' '):
|
||||
return line[3:]
|
||||
|
||||
return line
|
||||
|
||||
result = []
|
||||
for line_tuple in lines_classes:
|
||||
if line_tuple[0] == 1:
|
||||
if shift_code:
|
||||
shift = 3
|
||||
else:
|
||||
shift = -1
|
||||
result.append((line_tuple[0], _shift_code(line_tuple[1], shift=shift)))
|
||||
else:
|
||||
if line_tuple[1].strip() == "":
|
||||
result.append((line_tuple[0], ""))
|
||||
for line in textwrap.fill(line_tuple[1]).splitlines():
|
||||
result.append((line_tuple[0], line))
|
||||
|
||||
return result
|
||||
|
||||
def _run_vim_script(script_lines, text_lines):
|
||||
"""
|
||||
Apply `script_lines` to `lines_classes`
|
||||
and returns the result
|
||||
"""
|
||||
|
||||
script_vim = NamedTemporaryFile(delete=True)
|
||||
textfile = NamedTemporaryFile(delete=True)
|
||||
|
||||
open(script_vim.name, "w").write("\n".join(script_lines))
|
||||
open(textfile.name, "w").write("\n".join(text_lines))
|
||||
|
||||
script_vim.file.close()
|
||||
textfile.file.close()
|
||||
|
||||
my_env = os.environ.copy()
|
||||
my_env['HOME'] = PATH_VIM_ENVIRONMENT
|
||||
|
||||
cmd = ["script", "-q", "-c",
|
||||
"vim -S %s %s" % (script_vim.name, textfile.name)]
|
||||
subprocess.Popen(cmd, shell=False, stdout=FNULL, stderr=FNULL, env=my_env).communicate()
|
||||
|
||||
return open(textfile.name, "r").read()
|
||||
|
||||
def _commenting_script(lines_blocks, filetype):
|
||||
|
||||
script_lines = []
|
||||
block_start = 1
|
||||
for block in lines_blocks:
|
||||
lines = list(block[1])
|
||||
|
||||
block_end = block_start + len(lines)-1
|
||||
|
||||
if block[0] == 0:
|
||||
comment_type = 'sexy'
|
||||
if block_end - block_start < 1:
|
||||
comment_type = 'comment'
|
||||
|
||||
script_lines.insert(0, "%s,%s call NERDComment(1, '%s')"
|
||||
% (block_start, block_end, comment_type))
|
||||
script_lines.insert(0, "%s,%s call NERDComment(1, 'uncomment')"
|
||||
% (block_start, block_end))
|
||||
|
||||
block_start = block_end + 1
|
||||
|
||||
script_lines.insert(0, "set ft=%s" % _language_name(filetype))
|
||||
script_lines.append("wq")
|
||||
|
||||
return script_lines
|
||||
|
||||
def _beautify(text, filetype, add_comments=False, remove_text=False):
|
||||
"""
|
||||
Main function that actually does the whole beautification job.
|
||||
"""
|
||||
|
||||
# We shift the code if and only if we either convert the text into comments
|
||||
# or remove the text completely. Otherwise the code has to remain aligned
|
||||
shift_code = add_comments or remove_text
|
||||
|
||||
lines = [x.rstrip('\n') for x in text.splitlines()]
|
||||
lines = _cleanup_lines(lines)
|
||||
lines_classes = zip(_classify_lines(lines), lines)
|
||||
lines_classes = _wrap_lines(lines_classes, shift_code=shift_code)
|
||||
#for x,y in lines_classes:
|
||||
# print "%s: %s" % (x, y)
|
||||
|
||||
if remove_text:
|
||||
lines = [line[1] for line in lines_classes if line[0] == 1]
|
||||
lines = _cleanup_lines(lines)
|
||||
output = "\n".join(lines)
|
||||
if not output.endswith('\n'):
|
||||
output += "\n"
|
||||
elif not add_comments:
|
||||
output = "\n".join(line[1] for line in lines_classes)
|
||||
else:
|
||||
lines_blocks = groupby(lines_classes, key=lambda x: x[0])
|
||||
script_lines = _commenting_script(lines_blocks, filetype)
|
||||
output = _run_vim_script(
|
||||
script_lines,
|
||||
[line for (_, line) in lines_classes])
|
||||
|
||||
return output
|
||||
|
||||
def beautify(text, lang, options):
|
||||
"""
|
||||
Process input `text` according to the specified `mode`.
|
||||
Adds comments if needed, according to the `lang` rules.
|
||||
Caches the results.
|
||||
The whole work (except caching) is done by _beautify().
|
||||
"""
|
||||
|
||||
options = options or {}
|
||||
beauty_options = dict((k, v) for k, v in options.items() if k in
|
||||
['add_comments', 'remove_text'])
|
||||
|
||||
mode = ''
|
||||
if beauty_options.get('add_comments'):
|
||||
mode += 'c'
|
||||
if beauty_options.get('remove_text'):
|
||||
mode += 'q'
|
||||
|
||||
if beauty_options == {}:
|
||||
# if mode is unknown, just don't transform the text at all
|
||||
return text
|
||||
|
||||
digest = "t:%s:%s:%s" % (hashlib.md5(text).hexdigest(), lang, mode)
|
||||
answer = REDIS.get(digest)
|
||||
if answer:
|
||||
return answer
|
||||
|
||||
answer = _beautify(text, lang, **beauty_options)
|
||||
|
||||
REDIS.set(digest, answer)
|
||||
return answer
|
||||
|
||||
def __main__():
|
||||
text = sys.stdin.read()
|
||||
filetype = sys.argv[1]
|
||||
options = {
|
||||
"": {},
|
||||
"c": dict(add_comments=True),
|
||||
"C": dict(add_comments=False),
|
||||
"q": dict(remove_text=True),
|
||||
}[sys.argv[2]]
|
||||
result = beautify(text, filetype, options)
|
||||
sys.stdout.write(result)
|
||||
|
||||
if __name__ == '__main__':
|
||||
__main__()
|
||||
Reference in New Issue
Block a user