Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Examples  

query.C

Go to the documentation of this file.
00001 // $Id: query.C,v 1.2 2002/03/27 11:21:21 dvermeir Exp $
00002 #include <sstream>
00003 #include <vector>
00004 #include <iterator>
00005 #include <fstream>
00006 #include <algorithm>
00007 #include "query.h"
00008 #include "wordstreamiterator.h"
00009 
00010 Query::Query(const Index& index, const string &q) throw (runtime_error): index_(index), query_(q) {
00011 static const string syntax_error("SYNTAX ERROR "); 
00012 // First fill words_, it will be used to query the index_.
00013 istringstream iss(q);
00014 copy(wordstream_iterator(iss, index.ignore()), 
00015      wordstream_iterator(),
00016      inserter(words_, words_.end()));
00017 if (words_.size() == 0)
00018   throw runtime_error(syntax_error + q);
00019 // Next detect phrases, these will be used to check candidate files.
00020 string::size_type pos(0);
00021 string::size_type start(string::npos);
00022 string::size_type end(string::npos);
00023 while ((start = q.find('"',pos)) != string::npos) {
00024   // Got a double quote at position start.
00025   // First make 1-word phrases out of q[pos]..q[start].
00026   {
00027   string words(q,pos, start - pos);
00028   istringstream words_iss(words);
00029   wordstream_iterator i(words_iss, index.ignore());
00030   wordstream_iterator end;
00031   for ( ; i!=end; ++i) {
00032     string w(*i);
00033     // only necessary of word is longer than index word_size.
00034     if (w.size() > index.word_size())
00035       phrases_.push_back(Phrase(1,w));
00036     }
00037   }
00038   // Find closing double quote.
00039   end = q.find('"', start+1);
00040   if (end == string::npos)  // No closing double quote.
00041     throw runtime_error(syntax_error + q);
00042   // Retrieve phrase string.
00043   string phrase_s(q, start + 1, end - start -1);
00044   istringstream iss(phrase_s);
00045   Phrase phrase;
00046   copy(wordstream_iterator(iss, index.ignore()), 
00047        wordstream_iterator(),
00048        back_inserter(phrase));
00049   phrases_.push_back(phrase);
00050   pos = end+1;
00051   }
00052 // Make 1-word phrases out of tail q[pos..] of string.
00053 {
00054 string words(q,pos);
00055 istringstream words_iss(words);
00056 wordstream_iterator i(words_iss, index.ignore());
00057 wordstream_iterator end;
00058 for ( ; i!=end; ++i) {
00059   string w(*i);
00060   // only necessary of word is longer than index word_size.
00061   if (w.size() > index.word_size())
00062     phrases_.push_back(Phrase(1,w));
00063   }
00064 }
00065 }
00066 
00067 const Index::Files&
00068 Query::exec() {
00069 // 1. Retrieve list of files containing all (short versions of) words.
00070 Index::Files    fset;
00071 index_.query(words_, fset);
00072 // 2. For each file in fset, check if all phrases occur. If so,
00073 //    add it to result.
00074 for (Index::Files::iterator f=fset.begin(); f!=fset.end(); ++f) {
00075   // Grab a copy of the file in a vector. This avois repeatedly
00076   // reading the file, hopefully improving performance.
00077   vector<string> fseq;
00078   ifstream ifs((**f).c_str());
00079   if (!ifs)
00080     continue; // Next file.
00081   copy(wordstream_iterator(ifs, index_.ignore()), 
00082        wordstream_iterator(),
00083        back_inserter(fseq));
00084   // Check that each phrase appears in fseq.
00085   bool ok(true);
00086   for (list<Query::Phrase>::const_iterator p = phrases_.begin(); 
00087        ok && p!=phrases_.end(); 
00088        ++p) {
00089     ok = (search(fseq.begin(),fseq.end(),p->begin(),p->end()) != fseq.end());
00090     }
00091   if (ok)
00092     result_.insert(*f);
00093   }
00094 return result_;
00095 }
00096 
00097 ostream&
00098 operator<<(ostream& os, const Query& q) {
00099 os << "q.words = ";
00100 copy(q.words_.begin(), q.words_.end(), ostream_iterator<string>(os," "));
00101 os << "\n";
00102 os << "q.phrases = " << endl;
00103 for (list<Query::Phrase>::const_iterator i=q.phrases_.begin(); 
00104      i!=q.phrases_.end(); ++i) {
00105   os << " \"";
00106   copy((*i).begin(), (*i).end(), ostream_iterator<string>(os," "));
00107   os << "\"\n";
00108   }
00109 return os;
00110 }
00111 

textindexer-0.2 [27 March, 2002]