6.5. qpretty.c++.l
A pretty printer for C++.
%{
#include <iostream>
#include <string>
#include <cstring>
#include "qweave.hh"
static const string *src; // Points to the raw source code.
static unsigned int src_index; // Index into the raw source.
static string s; // The TeX output.
A list of keywords in C++, with codes which indicate their type.
static Word cpp_words[] =
{
{ "auto", 3 },
{ "bool", 3 },
{ "break", 1 },
{ "case", 1 },
{ "catch", 1 },
{ "char", 3 },
{ "class", 3 },
{ "const", 3 },
{ "const_cast", 1 },
{ "default", 1 },
{ "delete", 1 },
{ "double", 3 },
{ "dynamic_cast", 1 },
{ "else", 1 },
{ "enum", 3 },
{ "explicit", 3 },
{ "export", 1 }, // XXX?
{ "float", 3 },
{ "FILE", 3 },
{ "friend", 3 },
{ "goto", 1 },
{ "if", 1 },
{ "inline", 3 },
{ "int", 3 },
{ "long", 3 },
{ "mutable", 3 },
{ "namespace", 3 },
{ "new", 1 },
{ "operator", 1 },
{ "private", 1 },
{ "protected", 1 },
{ "public", 1 },
{ "register", 3 },
{ "reinterpret_cast", 1 },
{ "return", 1 },
{ "short", 3 },
{ "signed", 3 },
{ "size_t", 3 },
{ "sizeof", 1 },
{ "static", 3 },
{ "static_cast", 1 },
{ "struct", 3 },
{ "switch", 1 },
{ "template", 3 },
{ "this", 1 },
{ "throw", 1 },
{ "try", 1 },
{ "typeid", 1 },
{ "typename", 3 },
{ "unsigned", 3 },
{ "using", 1 },
{ "virtual", 3 },
{ "void", 3 },
{ "volatile", 3 },
{ "while", 1 },
{ "wchar_t", 3 },
{ "EOF", 2 },
{ "M_PI", 2 },
{ "NPOS", 2 },
{ "NULL", 2 },
{ "PI", 2 },
{ "true", 2 },
{ "false", 2 },
{ 0, 0 }
};
void
add_include (const string &include, bool system_one)
{
// Add the `#include' bit and a space.
s += style_start (STYLE_PREPROC);
string se;
format_text ('#', se, true);
s += se;
s += "include";
s += style_end (STYLE_PREPROC);
s += ' ';
// Work out what the name of the file is.
string::size_type before = include.find (system_one ? '<' : '\"');
string::size_type after = include.find (system_one ? '>' : '\"', ++before);
string filename = include.substr (before, after - before);
// Search for the filename if its a local file.
QefDocFile *qfile = find_file_from_index (filename);
// Add the name of the file with the appropriate quoting.
s += style_start (STYLE_INCLUDE_FILE);
format_text (system_one ? '<' : '\"', se, true);
s += se;
if (use_html && qfile)
{
char buf[128];
if (option_split)
sprintf (buf, "<a href=\"%s.html\">",
qfile->preproc_filename.c_str());
else
sprintf (buf, "<a href=\"#F%d\">", qfile->filenum + 1);
s += buf;
}
s += format_text (filename, true);
if (use_html && qfile)
s += "</a>";
format_text (system_one ? '>' : '\"', se, true);
s += se;
s += style_end (STYLE_INCLUDE_FILE);
}
%}
%option yylineno
%option stack
%option noyywrap
%option never-interactive
%s COMLINE
%s COMMULT
%s STRING
%s CHAR
%x STRESC
%x NOTIDENT
%%
Things which start comments.
<INITIAL>"//" {
s += style_start (STYLE_COMMENT);
s += "//";
BEGIN (COMLINE);
}
<INITIAL>"/*" {
s += style_start (STYLE_COMMENT);
s += "/*";
BEGIN (COMMULT);
}
The start of a string or character literal.
<INITIAL>\" {
s += style_start (STYLE_STRING_LIT);
s += '\"';
BEGIN (STRING);
}
<INITIAL>' {
s += style_start (STYLE_STRING_LIT);
s += '\'';
BEGIN (CHAR);
}
Operators which we print verbatim.
<INITIAL>(\+|-|\*|\/|%|&&|\|\||&|\||^|<<|>>)= {
s += format_text (yycxxtext, true);
}
Operators which we can output in a better way than just as plain text.
<INITIAL>!= { s += use_html ? "≠" : "$\\ne$"; }
<INITIAL>== { s += use_html ? "==" : "$==$"; }
<INITIAL>\>= { s += use_html ? "≥" : "$\\geq$"; }
<INITIAL>\<= { s += use_html ? "≤" : "$\\leq$"; }
<INITIAL>-> { s += use_html ? "->" : "$\\rightarrow$"; }
<INITIAL>= { s += "=" /*"$\\leftarrow$"*/; }
Keywords.
<INITIAL>[[:alpha:]_][[:alnum:]_]* {
int wordtype = find_word (cpp_words, yycxxtext);
string se = format_text (yycxxtext, true);
if (wordtype == 1)
s += style_text (STYLE_KEYWORD, se);
else if (wordtype == 2)
s += style_text (STYLE_CONSTANT, se);
else if (wordtype == 3)
s += style_text (STYLE_TYPEWORD, se);
else
{
// Unknown identifiers get reparsed.
yyless (0);
BEGIN (NOTIDENT);
}
}
Preprocessor directives.
<INITIAL>^\#include[ \t]+<[^\n>]+> {
add_include (yycxxtext, true);
}
<INITIAL>^\#include[ \t]+\"[^\n>]+\" {
add_include (yycxxtext, false);
}
<INITIAL>^\#[[:alpha:]]+ {
s += style_start (STYLE_PREPROC);
string se;
format_text ('#', se, true);
s += se;
s += (yycxxtext + 1);
s += style_end (STYLE_PREPROC);
}
The insides of comments, both one-liners and C-style ones.
<COMLINE>\n {
s += style_end (STYLE_COMMENT);
yyless (0);
BEGIN (INITIAL);
}
<COMMULT>"*/" {
s += "*/";
s += style_end (STYLE_COMMENT);
BEGIN (INITIAL);
}
<COMLINE,COMMULT>(FIXME|TODO|XXX) {
s += style_start (STYLE_COMMENT_HL);
s += yycxxtext;
s += style_end (STYLE_COMMENT_HL);
}
<COMLINE,COMMULT>. {
string se;
format_text (yycxxtext[0], se, true);
s += se;
}
<COMLINE,COMMULT><<EOF>> {
s += style_end (STYLE_COMMENT);
return 0;
}
Escape codes in string and character literals.
<STRING,CHAR>\\ {
s += style_start (STYLE_STRING_ESC);
s += use_html ? "\\" : "$\\backslash$";
yy_push_state (STRESC);
}
The ends of a string or character literal.
<STRING>\" {
s += '\"';
s += style_end (STYLE_STRING_LIT);
BEGIN (INITIAL);
}
<CHAR>' {
s += '\'';
s += style_end (STYLE_STRING_LIT);
BEGIN (INITIAL);
}
More stuff for backslash escape codes.
<STRESC>. {
string se;
format_text (yycxxtext[0], se, true);
s += se;
s += style_end (STYLE_STRING_ESC);
yy_pop_state();
}
<STRESC><<EOF>> {
cerr << "warning: EOF in string or char literal.\n";
s += style_end (STYLE_STRING_ESC);
s += style_end (STYLE_STRING_LIT);
return 0;
}
Rules to catch everything else.
<*>_ { s += use_html ? "_" : "\\_"; /* FIXME */ }
. {
string se;
format_text (yycxxtext[0], se, true);
s += se;
}
<INITIAL>\n\n { s += use_html ? "\n\n" : "\n\\medskip\n"; }
<NOTIDENT>[[:alnum:]] { s += yycxxtext[0]; }
<NOTIDENT>.|\n {
yyless (0);
BEGIN (INITIAL);
}
<INITIAL,STRING,CHAR,COMMULT>\n { s += '\n'; }
<INITIAL,STRING,CHAR>. {
string se;
format_text (yycxxtext[0], se, true);
s += se;
}
<INITIAL,NOTIDENT,STRING,CHAR><<EOF>> {
return 0;
}
%%
This is the function called by qweave when a file needs processing.
const string &
pretty_print_cxx (const string &source)
{
src = &source;
src_index = 0;
s = "";
BEGIN (INITIAL);
yylex();
return s;
}