2.5. index.cc
Functions for doing indexing related things.
#include "doc.hh"
#include <cctype>
using namespace libdoc;
typedef hash_map <const char *, int, hash <const char *>, eqstr>::iterator HMI;
typedef hash_map <const char *, int, hash <const char *>,
eqstr>::const_iterator HMCI;
DocumentSender::DocumentSender (const string &filename, int sock,
const string &frm)
: path (filename), format (frm), sent (false)
{
dest = fdopen (sock, "w");
if (dest == NULL)
throw ExceptionSystemError ("can't fdopen the socket to docd");
}
DocumentSender::~DocumentSender ()
{
send_document();
while (words.begin() ≠ words.end())
{
const char *s = words.begin()->first;
words.erase (words.begin());
delete s;
}
}
Send to the daemon information about a document. This also tells it to
expect further information about the document (a list of the words it
contains).
void
DocumentSender::set_header_info (const string &thetitle,
const string &thedate, const string &theman)
{
title = thetitle;
date = thedate;
manual = theman;
}
void
DocumentSender::add_word (const string &word)
{
string w (word.size(), ' ');
for (size_t i = 0; i < word.size(); ++i)
w[i] = tolower (word[i]);
if (words.find (w.c_str()) == words.end())
{
char *cstr = new char[w.size() + 1];
strcpy (cstr, w.c_str());
words[cstr] = 1;
}
}
Indicate that there is no more information for this document by printing a
newline. If the start of the document still hasn't been output then we do
that now.
void
DocumentSender::send_document ()
{
if (sent)
return;
fprintf (dest, "d%s\n%s\n%s\n%s\n%s\n", path.c_str(), title.c_str(),
date.c_str(), manual.c_str(), format.c_str());
If any words are queued to be transmitted we deal with them now, since now
we are ready to do so.
sent = true;
for (HMCI i = words.begin(); i ≠ words.end(); ++i)
send_word (i->first);
fputs ("e\n", dest);
}
Either send the word to the daemon or print it out, or put it on the queue
if we are not ready to dead with it yet. Words on the stoplist are ignored
and all words are tidied up by removing trailing punctuation and making
it lowercase (unless its allcaps, which might be significant).
void
DocumentSender::send_word (const string &word)
{
string w = word;
while (w.size() > 0 && !isalpha (w[w.size() - 1]))
w.resize (w.size() - 1, ' ');
if (!w.empty())
fprintf (dest, "w%s\n", w.c_str());
}