6.6. qpretty.python.l


6.6.1

A pretty printer for Python.


6.6.2
%{

#include <iostream>
#include <string>
#include <cstring>
#include "qweave.hh"

6.6.3
static const string *src;        // Points to the raw source code.
static unsigned int src_index;   // Index into the raw source.
static string s;                 // The TeX output.

6.6.4

Keywords used in Python. The typecodes are all `1' because we don't need that feature.

static Word py_words[] =
{
   { "break", 1 },
   { "continue", 1 },
   { "class", 1 },
   { "del", 1 },
   { "def", 1 },
   { "except", 1 },
   { "exec", 1 },
   { "finally", 1 },
   { "pass", 1 },
   { "print", 1 },
   { "raise", 1 },
   { "return", 1 },
   { "try", 1 },
   { "global", 1 },
   { "assert", 1 },
   { "lambda", 1 },
   { "for", 1 },
   { "while", 1 },
   { "if", 1 },
   { "elif", 1 },
   { "else", 1 },
   { "then", 1 },
   { "and", 1 },
   { "in", 1 },
   { "is", 1 },
   { "not", 1 },
   { "or", 1 },
   { 0, 0 }
};

%}

6.6.5
%option yylineno
%option stack
%option noyywrap
%option never-interactive

%s COMLINE
%s STRINGSNG
%s STRINGDBL
%x STRESC
%x NOTIDENT

%%

6.6.6

Match the start of comments.

<INITIAL>"#"     {
                     s += style_start (STYLE_COMMENT);
                     s += "\\#";
                     BEGIN (COMLINE);
                  }

6.6.7

Match the start of string literals.

<INITIAL>\"       {
                     s += style_start (STYLE_STRING_LIT);
                     s += '\"';
                     BEGIN (STRINGDBL);
                  }
<INITIAL>'        {
                     s += style_start (STYLE_STRING_LIT);
                     s += '\'';
                     BEGIN (STRINGSNG);
                  }

6.6.8

Match operators which we don't do anything interesting with.

<INITIAL>(\+|-|\*|\/|%|&&|\|\||&|\||^|<<|>>)= {
                     s += format_text (yypytext, true);
                  }

6.6.9

Match operators which we display in more interesting ways.

<INITIAL>!=       { s += use_html ? "&ne;" : "$\\ne$"; }
<INITIAL>==       { s += use_html ? "==" : "$==$"; }
<INITIAL>\>=      { s += use_html ? "&ge;" : "$\\geq$"; }
<INITIAL>\<=      { s += use_html ? "&le;" : "$\\leq$"; }
<INITIAL>=        { s += "=" /*"$\\leftarrow$"*/; }

6.6.10

Match keywords.

<INITIAL>[[:alpha:]_][[:alnum:]_]* {
                     int wordtype = find_word (py_words, yypytext);
                     if (wordtype == 1)
                        s += style_text (STYLE_KEYWORD, yypytext);
                     else if (wordtype == 2)
                        s += style_text (STYLE_CONSTANT, yypytext);
                     else if (wordtype == 3)
                        s += style_text (STYLE_TYPEWORD, yypytext);
                     else
                     {
                        // Unknown identifiers get reparsed.
                        yyless (0);
                        BEGIN (NOTIDENT);
                     }
                  }

6.6.11

Match the `import' startment.

<INITIAL>^import[ \t]+[^ \t\n\r\v\f]+ {
                     s += style_start (STYLE_PREPROC);
                     s += "import";
                     s += style_end (STYLE_PREPROC);
                     int i = 6;
                     for (; isspace (yypytext[i]); ++i)
                        s += yypytext[i];
                     s += style_start (STYLE_INCLUDE_FILE);
                     s += format_text (yypytext + i, true);
                     s += style_end (STYLE_INCLUDE_FILE);
                  }

6.6.12

Process comments.

<COMLINE>\n       {
                     s += style_end (STYLE_COMMENT);
                     yyless (0);
                     BEGIN (INITIAL);
                  }
<COMLINE>(FIXME|TODO|XXX) {
                     s += style_start (STYLE_COMMENT_HL);
                     s += yypytext;
                     s += style_end (STYLE_COMMENT_HL);
                  }
<COMLINE>.        {
                     string se;
                     format_text (yypytext[0], se, true);
                     s += se;
                  }
<COMLINE><<EOF>>  {
                     s += style_end (STYLE_COMMENT);
                     return 0;
                  }

6.6.13

Process string literals.

<STRINGSNG,STRINGDBL>\\ {
                     s += style_start (STYLE_STRING_ESC);
                     s += use_html ? "\\" : "$\\backslash$";
                     yy_push_state (STRESC);
                  }
<STRINGDBL>\" {
                     s += '\"';
                     s += style_end (STYLE_STRING_LIT);
                     BEGIN (INITIAL);
                  }
<STRINGSNG>'         {
                     s += '\'';
                     s += style_end (STYLE_STRING_LIT);
                     BEGIN (INITIAL);
                  }

6.6.14

Process escape sequences in string literals.

<STRESC>.         {
                     string se;
                     format_text (yypytext[0], se, true);
                     s += se;
                     s += style_end (STYLE_STRING_ESC);
                     yy_pop_state();
                  }
<STRESC><<EOF>>   {
                     cerr << "warning: EOF in string or char literal.\n";
                     s += style_end (STYLE_STRING_ESC);
                     s += style_end (STYLE_STRING_LIT);
                     return 0;
                  }

6.6.15

Match other random things.

<*>_              { s += use_html ? "_" : "\\_"; /* FIXME */ }
.                 {
                     string se;
                     format_text (yypytext[0], se, true);
                     s += se;
                  }
<INITIAL>\n\n     { s += use_html ? "\n\n" : "\n\\medskip\n"; }

<NOTIDENT>[[:alnum:]] { s += yypytext[0]; }
<NOTIDENT>.|\n    {
                     yyless (0);
                     BEGIN (INITIAL);
                  }

<INITIAL,STRINGSNG,STRINGDBL>\n { s += '\n'; }
<INITIAL,STRINGSNG,STRINGDBL>. {
                     string se;
                     format_text (yypytext[0], se, true);
                     s += se;
                  }
<INITIAL,NOTIDENT,STRINGSNG,STRINGDBL><<EOF>> {
                     return 0;
                  }

%%

6.6.16

This is the function called by `qweave' when it has something for us to process.

const string &
pretty_print_python (const string &source)
{
   src = &source;
   src_index = 0;

   s = "";
   BEGIN (INITIAL);
   yylex();

   return s;
}