6.6. qpretty.python.l
A pretty printer for Python.
%{
#include <iostream>
#include <string>
#include <cstring>
#include "qweave.hh"
static const string *src; // Points to the raw source code.
static unsigned int src_index; // Index into the raw source.
static string s; // The TeX output.
Keywords used in Python. The typecodes are all `1' because we don't need
that feature.
static Word py_words[] =
{
{ "break", 1 },
{ "continue", 1 },
{ "class", 1 },
{ "del", 1 },
{ "def", 1 },
{ "except", 1 },
{ "exec", 1 },
{ "finally", 1 },
{ "pass", 1 },
{ "print", 1 },
{ "raise", 1 },
{ "return", 1 },
{ "try", 1 },
{ "global", 1 },
{ "assert", 1 },
{ "lambda", 1 },
{ "for", 1 },
{ "while", 1 },
{ "if", 1 },
{ "elif", 1 },
{ "else", 1 },
{ "then", 1 },
{ "and", 1 },
{ "in", 1 },
{ "is", 1 },
{ "not", 1 },
{ "or", 1 },
{ 0, 0 }
};
%}
%option yylineno
%option stack
%option noyywrap
%option never-interactive
%s COMLINE
%s STRINGSNG
%s STRINGDBL
%x STRESC
%x NOTIDENT
%%
Match the start of comments.
<INITIAL>"#" {
s += style_start (STYLE_COMMENT);
s += "\\#";
BEGIN (COMLINE);
}
Match the start of string literals.
<INITIAL>\" {
s += style_start (STYLE_STRING_LIT);
s += '\"';
BEGIN (STRINGDBL);
}
<INITIAL>' {
s += style_start (STYLE_STRING_LIT);
s += '\'';
BEGIN (STRINGSNG);
}
Match operators which we don't do anything interesting with.
<INITIAL>(\+|-|\*|\/|%|&&|\|\||&|\||^|<<|>>)= {
s += format_text (yypytext, true);
}
Match operators which we display in more interesting ways.
<INITIAL>!= { s += use_html ? "≠" : "$\\ne$"; }
<INITIAL>== { s += use_html ? "==" : "$==$"; }
<INITIAL>\>= { s += use_html ? "≥" : "$\\geq$"; }
<INITIAL>\<= { s += use_html ? "≤" : "$\\leq$"; }
<INITIAL>= { s += "=" /*"$\\leftarrow$"*/; }
Match keywords.
<INITIAL>[[:alpha:]_][[:alnum:]_]* {
int wordtype = find_word (py_words, yypytext);
if (wordtype == 1)
s += style_text (STYLE_KEYWORD, yypytext);
else if (wordtype == 2)
s += style_text (STYLE_CONSTANT, yypytext);
else if (wordtype == 3)
s += style_text (STYLE_TYPEWORD, yypytext);
else
{
// Unknown identifiers get reparsed.
yyless (0);
BEGIN (NOTIDENT);
}
}
Match the `import' startment.
<INITIAL>^import[ \t]+[^ \t\n\r\v\f]+ {
s += style_start (STYLE_PREPROC);
s += "import";
s += style_end (STYLE_PREPROC);
int i = 6;
for (; isspace (yypytext[i]); ++i)
s += yypytext[i];
s += style_start (STYLE_INCLUDE_FILE);
s += format_text (yypytext + i, true);
s += style_end (STYLE_INCLUDE_FILE);
}
Process comments.
<COMLINE>\n {
s += style_end (STYLE_COMMENT);
yyless (0);
BEGIN (INITIAL);
}
<COMLINE>(FIXME|TODO|XXX) {
s += style_start (STYLE_COMMENT_HL);
s += yypytext;
s += style_end (STYLE_COMMENT_HL);
}
<COMLINE>. {
string se;
format_text (yypytext[0], se, true);
s += se;
}
<COMLINE><<EOF>> {
s += style_end (STYLE_COMMENT);
return 0;
}
Process string literals.
<STRINGSNG,STRINGDBL>\\ {
s += style_start (STYLE_STRING_ESC);
s += use_html ? "\\" : "$\\backslash$";
yy_push_state (STRESC);
}
<STRINGDBL>\" {
s += '\"';
s += style_end (STYLE_STRING_LIT);
BEGIN (INITIAL);
}
<STRINGSNG>' {
s += '\'';
s += style_end (STYLE_STRING_LIT);
BEGIN (INITIAL);
}
Process escape sequences in string literals.
<STRESC>. {
string se;
format_text (yypytext[0], se, true);
s += se;
s += style_end (STYLE_STRING_ESC);
yy_pop_state();
}
<STRESC><<EOF>> {
cerr << "warning: EOF in string or char literal.\n";
s += style_end (STYLE_STRING_ESC);
s += style_end (STYLE_STRING_LIT);
return 0;
}
Match other random things.
<*>_ { s += use_html ? "_" : "\\_"; /* FIXME */ }
. {
string se;
format_text (yypytext[0], se, true);
s += se;
}
<INITIAL>\n\n { s += use_html ? "\n\n" : "\n\\medskip\n"; }
<NOTIDENT>[[:alnum:]] { s += yypytext[0]; }
<NOTIDENT>.|\n {
yyless (0);
BEGIN (INITIAL);
}
<INITIAL,STRINGSNG,STRINGDBL>\n { s += '\n'; }
<INITIAL,STRINGSNG,STRINGDBL>. {
string se;
format_text (yypytext[0], se, true);
s += se;
}
<INITIAL,NOTIDENT,STRINGSNG,STRINGDBL><<EOF>> {
return 0;
}
%%
This is the function called by `qweave' when it has something for us
to process.
const string &
pretty_print_python (const string &source)
{
src = &source;
src_index = 0;
s = "";
BEGIN (INITIAL);
yylex();
return s;
}