343 lines
13 KiB
Python
343 lines
13 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
| This file is part of the web2py Web Framework
|
|
| Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
|
|
| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
|
|
"""
|
|
from __future__ import print_function
|
|
from gluon._compat import xrange
|
|
from gluon.utils import local_html_escape
|
|
import re
|
|
|
|
__all__ = ['highlight']
|
|
|
|
|
|
class Highlighter(object):
|
|
|
|
"""Does syntax highlighting.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
mode,
|
|
link=None,
|
|
styles=None,
|
|
):
|
|
"""
|
|
Initialize highlighter:
|
|
mode = language (PYTHON, WEB2PY, C, CPP, HTML, HTML_PLAIN)
|
|
"""
|
|
styles = styles or {}
|
|
mode = mode.upper()
|
|
if link and link[-1] != '/':
|
|
link = link + '/'
|
|
self.link = link
|
|
self.styles = styles
|
|
self.output = []
|
|
self.span_style = None
|
|
if mode == 'WEB2PY':
|
|
(mode, self.suppress_tokens) = ('PYTHON', [])
|
|
elif mode == 'PYTHON':
|
|
self.suppress_tokens = ['GOTOHTML']
|
|
elif mode == 'CPP':
|
|
(mode, self.suppress_tokens) = ('C', [])
|
|
elif mode == 'C':
|
|
self.suppress_tokens = ['CPPKEYWORD']
|
|
elif mode == 'HTML_PLAIN':
|
|
(mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON'])
|
|
elif mode == 'HTML':
|
|
self.suppress_tokens = []
|
|
else:
|
|
raise SyntaxError('Unknown mode: %s' % mode)
|
|
self.mode = mode
|
|
|
|
def c_tokenizer(
|
|
self,
|
|
token,
|
|
match,
|
|
style,
|
|
):
|
|
"""
|
|
Callback for C specific highlighting.
|
|
"""
|
|
|
|
value = local_html_escape(match.group(), quote=False)
|
|
self.change_style(token, style)
|
|
self.output.append(value)
|
|
|
|
def python_tokenizer(
|
|
self,
|
|
token,
|
|
match,
|
|
style,
|
|
):
|
|
"""
|
|
Callback for python specific highlighting.
|
|
"""
|
|
|
|
value = local_html_escape(match.group(), quote=False)
|
|
if token == 'MULTILINESTRING':
|
|
self.change_style(token, style)
|
|
self.output.append(value)
|
|
self.strMultilineString = match.group(1)
|
|
return 'PYTHONMultilineString'
|
|
elif token == 'ENDMULTILINESTRING':
|
|
if match.group(1) == self.strMultilineString:
|
|
self.output.append(value)
|
|
self.strMultilineString = ''
|
|
return 'PYTHON'
|
|
if style and style[:5] == 'link:':
|
|
self.change_style(None, None)
|
|
(url, style) = style[5:].split(';', 1)
|
|
if url == 'None' or url == '':
|
|
self.output.append('<span style="%s">%s</span>'
|
|
% (style, value))
|
|
else:
|
|
self.output.append('<a href="%s%s" style="%s">%s</a>'
|
|
% (url, value, style, value))
|
|
else:
|
|
self.change_style(token, style)
|
|
self.output.append(value)
|
|
if token == 'GOTOHTML':
|
|
return 'HTML'
|
|
return None
|
|
|
|
def html_tokenizer(
|
|
self,
|
|
token,
|
|
match,
|
|
style,
|
|
):
|
|
"""
|
|
Callback for HTML specific highlighting.
|
|
"""
|
|
|
|
value = local_html_escape(match.group(), quote=False)
|
|
self.change_style(token, style)
|
|
self.output.append(value)
|
|
if token == 'GOTOPYTHON':
|
|
return 'PYTHON'
|
|
return None
|
|
|
|
all_styles = {
|
|
'C': (c_tokenizer, (
|
|
('COMMENT', re.compile(r'//.*\r?\n'),
|
|
'color: green; font-style: italic'),
|
|
('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL),
|
|
'color: green; font-style: italic'),
|
|
('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n',
|
|
re.DOTALL), 'color: magenta; font-style: italic'),
|
|
('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'),
|
|
'font-weight: bold'),
|
|
('NUMBER',
|
|
re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
|
|
'color: red'),
|
|
('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|'
|
|
+ r'signed|unsigned|float|double|'
|
|
+ r'goto|break|return|continue|asm|'
|
|
+ r'case|default|if|else|switch|while|for|do|'
|
|
+ r'struct|union|enum|typedef|'
|
|
+ r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'),
|
|
'color:#185369; font-weight: bold'),
|
|
('CPPKEYWORD',
|
|
re.compile(r'(class|private|protected|public|template|new|delete|'
|
|
+ r'this|friend|using|inline|export|bool|throw|try|catch|'
|
|
+ r'operator|typeid|virtual)(?![a-zA-Z0-9_])'),
|
|
'color: blue; font-weight: bold'),
|
|
('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'),
|
|
'color: #FF9966'),
|
|
('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
|
|
None),
|
|
('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
|
|
)),
|
|
'PYTHON': (python_tokenizer, (
|
|
('GOTOHTML', re.compile(r'\}\}'), 'color: red'),
|
|
('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'),
|
|
'font-weight: bold'),
|
|
('NUMBER',
|
|
re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'
|
|
), 'color: red'),
|
|
('KEYWORD',
|
|
re.compile(r'(def|class|break|continue|del|exec|finally|pass|'
|
|
+ r'print|raise|return|try|except|global|assert|lambda|'
|
|
+ r'yield|for|while|if|elif|else|and|in|is|not|or|import|'
|
|
+ r'from|True|False)(?![a-zA-Z0-9_])'),
|
|
'color:#185369; font-weight: bold'),
|
|
('WEB2PY',
|
|
re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])'
|
|
), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
|
|
('MAGIC', re.compile(r'self|None'),
|
|
'color:#185369; font-weight: bold'),
|
|
('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'),
|
|
'color: #FF9966'),
|
|
('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'
|
|
), 'color: #FF9966'),
|
|
('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
|
|
None),
|
|
('COMMENT', re.compile(r'\#.*\r?\n'),
|
|
'color: green; font-style: italic'),
|
|
('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
|
|
)),
|
|
'PYTHONMultilineString': (python_tokenizer,
|
|
(('ENDMULTILINESTRING',
|
|
re.compile(r'.*?("""|\'\'\')',
|
|
re.DOTALL), 'color: darkred'), )),
|
|
'HTML': (html_tokenizer, (
|
|
('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'),
|
|
('COMMENT', re.compile(r'<!--[^>]*-->|<!>'),
|
|
'color: green; font-style: italic'),
|
|
('XMLCRAP', re.compile(r'<![^>]*>'),
|
|
'color: blue; font-style: italic'),
|
|
('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE
|
|
+ re.DOTALL), 'color: black'),
|
|
('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'),
|
|
'color: darkred; font-weight: bold'),
|
|
('ENDTAG', re.compile(r'/?>'),
|
|
'color: darkred; font-weight: bold'),
|
|
)),
|
|
}
|
|
|
|
def highlight(self, data):
|
|
"""
|
|
Syntax highlight some python code.
|
|
Returns html version of code.
|
|
"""
|
|
|
|
i = 0
|
|
mode = self.mode
|
|
while i < len(data):
|
|
for (token, o_re, style) in Highlighter.all_styles[mode][1]:
|
|
if token not in self.suppress_tokens:
|
|
match = o_re.match(data, i)
|
|
if match:
|
|
if style:
|
|
new_mode = \
|
|
Highlighter.all_styles[mode][0](self,
|
|
token, match, style
|
|
% dict(link=self.link))
|
|
else:
|
|
new_mode = \
|
|
Highlighter.all_styles[mode][0](self,
|
|
token, match, style)
|
|
if new_mode is not None:
|
|
mode = new_mode
|
|
i += max(1, len(match.group()))
|
|
break
|
|
else:
|
|
self.change_style(None, None)
|
|
self.output.append(data[i])
|
|
i += 1
|
|
self.change_style(None, None)
|
|
return ''.join(self.output).expandtabs(4)
|
|
|
|
def change_style(self, token, style):
|
|
"""
|
|
Generate output to change from existing style to another style only.
|
|
"""
|
|
|
|
if token in self.styles:
|
|
style = self.styles[token]
|
|
if self.span_style != style:
|
|
if style != 'Keep':
|
|
if self.span_style is not None:
|
|
self.output.append('</span>')
|
|
if style is not None:
|
|
self.output.append('<span style="%s">' % style)
|
|
self.span_style = style
|
|
|
|
|
|
def highlight(
|
|
code,
|
|
language,
|
|
link='/examples/globals/vars/',
|
|
counter=1,
|
|
styles=None,
|
|
highlight_line=None,
|
|
context_lines=None,
|
|
attributes=None,
|
|
):
|
|
styles = styles or {}
|
|
attributes = attributes or {}
|
|
code_style = styles.get('CODE', None) or '''
|
|
font-size: 11px;
|
|
font-family: Bitstream Vera Sans Mono,monospace;
|
|
background-color: transparent;
|
|
margin: 0;
|
|
padding: 5px;
|
|
border: none;
|
|
overflow: auto;
|
|
white-space: pre !important;
|
|
'''
|
|
linenumbers_style = styles.get('LINENUMBERS', None) or '''
|
|
font-size: 11px;
|
|
font-family: Bitstream Vera Sans Mono,monospace;
|
|
background-color: transparent;
|
|
margin: 0;
|
|
padding: 5px;
|
|
border: none;
|
|
color: #A0A0A0;
|
|
'''
|
|
linehighlight_style = styles.get('LINEHIGHLIGHT', None) or \
|
|
'background-color: #EBDDE2;'
|
|
|
|
if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML',
|
|
'WEB2PY']:
|
|
code = Highlighter(language, link, styles).highlight(code)
|
|
else:
|
|
code = local_html_escape(code, quote=False)
|
|
lines = code.split('\n')
|
|
|
|
if counter is None:
|
|
linenumbers = [''] * len(lines)
|
|
elif isinstance(counter, str):
|
|
linenumbers = [local_html_escape(counter, quote=False)] * len(lines)
|
|
else:
|
|
linenumbers = [str(i + counter) + '.' for i in
|
|
xrange(len(lines))]
|
|
|
|
if highlight_line:
|
|
if counter and not isinstance(counter, str):
|
|
lineno = highlight_line - counter
|
|
else:
|
|
lineno = highlight_line
|
|
if lineno < len(lines):
|
|
lines[lineno] = '<span style="%s">%s</span>' % (
|
|
linehighlight_style, lines[lineno])
|
|
linenumbers[lineno] = '<span style="%s">%s</span>' % (
|
|
linehighlight_style, linenumbers[lineno])
|
|
|
|
if context_lines:
|
|
if lineno + context_lines < len(lines):
|
|
delslice = slice(lineno + context_lines + 1, len(lines))
|
|
del lines[delslice]
|
|
del linenumbers[delslice]
|
|
if lineno - context_lines > 0:
|
|
delslice = slice(0, lineno - context_lines)
|
|
del lines[delslice]
|
|
del linenumbers[delslice]
|
|
|
|
code = '<br/>'.join(lines)
|
|
numbers = '<br/>'.join(linenumbers)
|
|
|
|
items = attributes.items()
|
|
fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1]
|
|
== '_' and value is None] + ['%s="%s"'
|
|
% (key[1:].lower(), str(value).replace('"', "'"))
|
|
for (key, value) in attributes.items() if key[:1]
|
|
== '_' and value])
|
|
if fa:
|
|
fa = ' ' + fa
|
|
return '<table%s><tr style="vertical-align:top;">' \
|
|
'<td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td>' \
|
|
'<td><pre style="%s">%s</pre></td></tr></table>' % (fa, linenumbers_style, numbers, code_style, code)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
argfp = open(sys.argv[1])
|
|
data = argfp.read()
|
|
argfp.close()
|
|
print('<html><body>' + highlight(data, sys.argv[2]) + '</body></html>')
|