#include <index.h>
Public Types | |
typedef set< string > | Strings |
typedef set< const string *> | Files |
typedef map< string, Files > | Map |
typedef map< const string *, time_t > | Dates |
typedef Map::const_iterator | iterator |
Public Methods | |
Index (size_t word_size, istream *is=0) | |
Constructor. More... | |
~Index () | |
const string * | file (const string &fn) const |
Return pool reference for filename or 0 if not in pool. More... | |
time_t | date (const string &fn) const |
Return insert date for file. More... | |
const Files * | query (const string &) const |
Consult index. More... | |
Files & | query (const Strings &q, Files &r) const |
Consult index. More... | |
size_t | insert (const string &fn, istream &is) |
Insert a range of words from filename into the index. More... | |
const Strings & | ignore () const |
void | ignore (const string &w) |
Put a word on the ignore list. More... | |
void | ignore (istream &is) |
Put all words from stream on the ignore list. More... | |
void | remove (const string &fn) |
Remove all words associated with filename from the index. More... | |
StringPool & | pool () |
const StringPool & | pool () const |
iterator | begin () const |
iterator | end () const |
size_t | word_size () const |
size_t | size () const |
Friends | |
ostream & | operator<< (ostream &os, const Index &index) |
Write the index to a text file. More... | |
istream & | operator>> (istream &os, Index &index) throw (runtime_error) |
Read the index from a text file. More... |
In addition, an index keeps a set of words to be ignored and it remembers the time of the last index operation on a filename.
Note that the class does not depend on files, only on streams. This is useful as we could e.g. reuse it, replacing filenames with url's.
Definition at line 16 of file index.h.
|
Definition at line 18 of file index.h. Referenced by file(), ignore(), Query::result(), and size(). |
|
Definition at line 19 of file index.h. Referenced by date(), Query::exec(), file(), operator<<(), Query::Query(), query(), remove(), Query::result(), Server::serve(), and Query::~Query(). |
|
Definition at line 20 of file index.h. Referenced by size(). |
|
Definition at line 21 of file index.h. Referenced by size(). |
|
Definition at line 22 of file index.h. Referenced by begin(), end(), and operator<<(). |
|
Constructor.
Definition at line 13 of file index.C. References ignore().
00013 : word_size_(word_sz) { 00014 if (is) 00015 ignore(*is); 00016 } |
|
Definition at line 18 of file index.C.
00018 { 00019 } |
|
Return pool reference for filename or 0 if not in pool.
Definition at line 38 of file index.h. References Files, and Strings. Referenced by date(), insert(), and remove().
00038 { return pool_(fn); } |
|
Return insert date for file.
Definition at line 93 of file index.C.
00093 { 00094 const string* pfn(file(fn)); 00095 if (!pfn) 00096 return 0; 00097 Dates::const_iterator i = dates_.find(pfn); 00098 if (i==dates_.end()) 00099 return 0; 00100 return (*i).second; 00101 } |
|
Consult index.
Definition at line 104 of file index.C. References Files, and word_size(). Referenced by Query::exec(), and query().
00104 { 00105 string ss(s,0,word_size()); 00106 Map::const_iterator i(map_.find(ss)); 00107 if (i==map_.end()) 00108 return 0; 00109 return &(*i).second; 00110 } |
|
Consult index.
Definition at line 114 of file index.C. References begin(), end(), Files, and query().
00114 { 00115 Files result[2]; 00116 int r(0); // index of result 00117 for (Strings::const_iterator i = q.begin(); i!=q.end(); ++i) { 00118 const Index::Files* qr(query(*i)); 00119 if ( qr == 0) { // one of the strings not found: just return empty set 00120 final_result.clear(); 00121 return final_result; 00122 } 00123 // assert qr != 0 00124 int nr((r+1)%2); // index of next result 00125 if (i==q.begin()) // first result, just store in result[nr] 00126 result[nr] = *qr; 00127 else { 00128 result[nr].clear(); 00129 set_intersection(result[r].begin(), result[r].end(), 00130 qr->begin(), qr->end(), 00131 inserter(result[nr],result[nr].end())); 00132 } 00133 r = nr; 00134 } 00135 final_result = result[r]; 00136 return final_result; 00137 } |
|
Insert a range of words from filename into the index.
Definition at line 140 of file index.C. References end(), file(), ignore(), remove(), and word_size(). Referenced by Server::serve().
00140 { 00141 const string* pfn(file(fn)); 00142 if (pfn) 00143 remove(fn); 00144 pfn = pool_[fn]; 00145 size_t n(0); 00146 wordstream_iterator i(is,ignore()); 00147 wordstream_iterator end; 00148 for (; i!=end; ++i) { 00149 string w(*i,0,word_size()); 00150 n += (map_[w].insert(pfn).second ? 1 : 0); 00151 } 00152 dates_[pfn] = Dv::Util::Date().time(); 00153 return n; 00154 } |
|
Definition at line 71 of file index.h. References Strings. Referenced by Query::exec(), ignore(), Index(), and insert().
00071 { return ignore_; } |
|
Put a word on the ignore list.
Definition at line 172 of file index.C.
00172 { 00173 ignore_.insert(w); 00174 } |
|
Put all words from stream on the ignore list.
Definition at line 177 of file index.C. References ignore().
|
|
Remove all words associated with filename from the index.
Definition at line 157 of file index.C. References file(), Files, and StringPool::remove(). Referenced by insert().
00157 { 00158 const string* pfn(file(fn)); 00159 if (pfn == 0) 00160 return; 00161 for (Map::iterator i = map_.begin(); i!=map_.end(); ++i) { 00162 Files& files((*i).second); 00163 files.erase(pfn); 00164 if (files.size() == 0) 00165 map_.erase(i); 00166 } 00167 dates_.erase(pfn); 00168 pool_.remove(fn); 00169 } |
|
Definition at line 109 of file index.h.
00109 { return pool_; } |
|
Definition at line 110 of file index.h.
00110 { return pool_; } |
|
Definition at line 112 of file index.h. References iterator. Referenced by operator<<(), and query().
00112 { return map_.begin(); } |
|
Definition at line 113 of file index.h. References iterator. Referenced by insert(), operator<<(), and query().
00113 { return map_.end(); } |
|
Definition at line 114 of file index.h. Referenced by insert(), and query().
00114 { return word_size_; } |
|
Definition at line 115 of file index.h. References Dates, Map, and Strings.
00115 { return map_.size(); } |
|
Write the index to a text file. Format: #textindexer VERSION -- do not edit this file time_t filename ... empty line word fileid .. Definition at line 24 of file index.C.
00024 { 00025 // Write first line: MAGIC version# 00026 os << MAGIC << " " << VERSION << " -- do not edit this file" << endl; 00027 // Write pool contents & remember the position of each pointer. 00028 os << index.dates_.size() << endl; 00029 map<const string*,int> file_nrs; 00030 size_t n(0); 00031 for (Index::Dates::const_iterator i = index.dates_.begin(); 00032 i!=index.dates_.end(); ++i) { 00033 os << (*i).second << ' ' << *((*i).first) << "\n"; 00034 file_nrs[(*i).first] = n++; 00035 } 00036 for (Index::iterator i=index.begin(); i!= index.end(); ++i) { 00037 os << (*i).first; 00038 const Index::Files& files((*i).second); 00039 for (Index::Files::const_iterator j = files.begin(); j!=files.end(); ++j) 00040 os << " " << file_nrs[*j]; 00041 os << "\n"; 00042 } 00043 return os; 00044 } |
|
Read the index from a text file.
Definition at line 47 of file index.C.
00047 { 00048 // Read first line: MAGIC version junk 00049 // Normally, we would use ignore(numeric_limits<int>::max(), '\n') 00050 // but #include <limits> does not work in this g++ version. 00051 string magic; 00052 string junk; 00053 double version; 00054 is >> magic >> version; 00055 getline(is,junk); 00056 if (magic!=MAGIC) 00057 throw runtime_error("Bad magic in textindexer index file"); 00058 // Perhaps a test on version should come here. 00059 vector<const string*> files; 00060 StringPool& pool(index.pool()); 00061 size_t n; 00062 is >> n; 00063 string filename; 00064 time_t t; 00065 for (size_t i=0; i<n; ++i) { 00066 is >> t; 00067 if (t==0) 00068 throw runtime_error("Index reader: time == 0"); 00069 is.ignore(); 00070 getline(is, filename); 00071 if (filename.size()==0) 00072 throw runtime_error("Index reader: empty filename "); 00073 const string* pfn(pool[filename]); 00074 files.push_back(pfn); 00075 index.dates_[pfn] = t; 00076 } 00077 string line; 00078 while (getline(is,line)) { 00079 istringstream iss(line); 00080 Word w; 00081 iss >> w; 00082 size_t i; 00083 while (iss>>i) { 00084 if (i>=files.size()) 00085 throw runtime_error(Dv::Util::tostring(i) + ": illegal file#"); 00086 index.map_[w.str()].insert(files[i]); 00087 } 00088 } 00089 return is; 00090 } |
textindexer-0.2 | [27 March, 2002] |