2.5. index.cc


2.5.1

Functions for doing indexing related things.

#include "doc.hh"
#include <cctype>
using namespace libdoc;

typedef hash_map <const char *, int, hash <const char *>, eqstr>::iterator HMI;
typedef hash_map <const char *, int, hash <const char *>,
                  eqstr>::const_iterator HMCI;

2.5.2
DocumentSender::DocumentSender (const string &filename, int sock,
                                const string &frm)
   : path (filename), format (frm), sent (false)
{
   dest = fdopen (sock, "w");
   if (dest == NULL)
      throw ExceptionSystemError ("can't fdopen the socket to docd");
}

2.5.3
DocumentSender::~DocumentSender ()
{
   send_document();

   while (words.begin() ≠ words.end())
   {
      const char *s = words.begin()->first;
      words.erase (words.begin());
      delete s;
   }
//   for (HMI i = words.begin(); i != words.end(); ++i)
//      fprintf (stderr, "-> %s\n", i->first);

//   fclose (dest);
}

2.5.4

Send to the daemon information about a document. This also tells it to expect further information about the document (a list of the words it contains).

void
DocumentSender::set_header_info (const string &thetitle,
                                 const string &thedate, const string &theman)
{
   title = thetitle;
   date = thedate;
   manual = theman;
}

2.5.5
void
DocumentSender::add_word (const string &word)
{
   string w (word.size(), ' ');
   for (size_t i = 0; i < word.size(); ++i)
      w[i] = tolower (word[i]);

   if (words.find (w.c_str()) == words.end())
   {
      char *cstr = new char[w.size() + 1];
      strcpy (cstr, w.c_str());
      words[cstr] = 1;
   }
}

2.5.6

Indicate that there is no more information for this document by printing a newline. If the start of the document still hasn't been output then we do that now.

void
DocumentSender::send_document ()
{
   if (sent)
      return;

   fprintf (dest, "d%s\n%s\n%s\n%s\n%s\n", path.c_str(), title.c_str(),
            date.c_str(), manual.c_str(), format.c_str());

2.5.7

If any words are queued to be transmitted we deal with them now, since now we are ready to do so.

   sent = true;

   // This currently ignores the number of occurances of each word.
   for (HMCI i = words.begin(); i ≠ words.end(); ++i)
      send_word (i->first);

   fputs ("e\n", dest);
}

2.5.8

Either send the word to the daemon or print it out, or put it on the queue if we are not ready to dead with it yet. Words on the stoplist are ignored and all words are tidied up by removing trailing punctuation and making it lowercase (unless its allcaps, which might be significant).

void
DocumentSender::send_word (const string &word)
{
   string w = word;
   while (w.size() > 0 && !isalpha (w[w.size() - 1]))
      w.resize (w.size() - 1, ' ');

   // TODO: look up on stoplist and remove duplicates.

   if (!w.empty())
      fprintf (dest, "w%s\n", w.c_str());
}