#include <logrecord.h>
Public Methods | |
LogRecord () | |
bool | parse_line (const string &line) |
Fill in date_, domain_, path_ components from line from log file. | |
const Date& | date () const |
const vector<string>& | path () const |
const vector<string>& | domain () const |
Private Methods | |
bool | parse_date (const string &line) |
Auxiliary function for parse_line(). | |
bool | parse_path (const string &line) |
Auxiliary function for parse_line(). | |
bool | parse_domain (const string &line) |
Auxiliary function for parse_line(). | |
Private Attributes | |
Date | date_ |
vector<string> | path_ |
vector<string> | domain_ |
Definition at line 9 of file logrecord.h.
|
Definition at line 11 of file logrecord.h. 00011 {} |
|
Fill in date_, domain_, path_ components from line from log file.
Definition at line 10 of file logrecord.C. Referenced by main().
00010 { 00011 /* Clear current vector<string> data members. This is 00012 important because parsing will append to path_ and domain_. 00013 */ 00014 path_.clear(); 00015 domain_.clear(); 00016 /* A parse succeeds if all components can be parsed. 00017 This should be modified so that we only parse what is necessary, 00018 according to the configuration. But for now, we are too lazy. 00019 */ 00020 return parse_date(line) && parse_path(line) && parse_domain(line); 00021 } |
|
Definition at line 16 of file logrecord.h. Referenced by Stats::add(), and parse_date().
00016 { return date_; } |
|
Definition at line 17 of file logrecord.h. Referenced by Stats::add().
00017 { return path_; } |
|
Definition at line 18 of file logrecord.h. Referenced by Stats::add().
00018 { return domain_; } |
|
Auxiliary function for parse_line().
Definition at line 25 of file logrecord.C. Referenced by parse_line().
00025 { 00026 /* Select date part: just after first '[' up to first folloing ' ' 00027 e.g. 00028 00029 spider1.tiscalinet.be - - [02/Nov/2000:10:20:36 +0100] .. 00030 00031 will select 00032 00033 02/Nov/2000:10:20:36 00034 */ 00035 string::size_type date_start(line.find_first_of('[')); 00036 string::size_type date_end(line.find_first_of(' ',date_start)); 00037 string date_string(line,date_start+1,date_end - date_start-1); 00038 00039 /* Make date_string acceptable to the Date::Date(const char*) parser. 00040 00041 1. replace '/' by '-'. In the example, this will yield 00042 00043 02-Nov-2000:10:20:36 00044 */ 00045 for (string::size_type i=0; i<date_string.size(); ++i) 00046 if (date_string[i]=='/') 00047 date_string[i] = '-'; 00048 /* 00049 2. Replace first ':' by space. In the example, this will yield 00050 00051 02-Nov-2000 10:20:36 00052 */ 00053 date_string.replace(date_string.find_first_of(':'),1," "); 00054 00055 /* Now parse using Date::Date(const string&). It will throw an exception 00056 if the date cannot be parsed. In that case, we catch the exception 00057 and return false. 00058 */ 00059 try { 00060 Date date(date_string); 00061 date_ = date; 00062 // We are only interested in hours, not minutes or seconds. 00063 date_.minutes(0); 00064 date_.seconds(0); 00065 } 00066 catch (exception& e) { 00067 cerr << "parse_date: " << e.what() << endl; 00068 return false; 00069 } 00070 return true; 00071 } |
|
Auxiliary function for parse_line().
Definition at line 111 of file logrecord.C. Referenced by parse_line().
00111 { 00112 /* The path part can be found between the first and second occurrences 00113 of `"' (double quote). An example is shown below. 00114 00115 "GET /ssl/kiesoos.cgi?rolnr=58769\&stjcode=5L10021 HTTP/1.0" 00116 00117 This will result in 00118 00119 _path = <"","ssl","kiesoos.cgi"> 00120 */ 00121 string::size_type path_start(line.find_first_of('\"')); 00122 if (path_start==string::npos) 00123 return false; 00124 /* We are not interested in the verb (e.g. GET), so we skip until after the first ' '. 00125 In the example, path_start would then point to. 00126 00127 /ssl/kiesoos.cgi?rolnr=58769\&stjcode=5L10021 HTTP/1.0" 00128 */ 00129 path_start = line.find_first_of(' ',path_start); 00130 if (path_start==string::npos) 00131 return false; 00132 ++path_start; // After ' '. 00133 /* We only want the path until the first '?', '#' or ' '. In the example 00134 this should result in. 00135 00136 path_string = /ssl/kiesoos.cgi 00137 */ 00138 00139 string::size_type path_end = line.find_first_of("#? ",path_start); 00140 if (path_end==string::npos) 00141 return false; 00142 00143 if (path_end==path_start) // cannot have an empty path 00144 return false; 00145 00146 string path_string(line,path_start,path_end - path_start); 00147 00148 // Decode to translate "%7E" back to '~' etc. 00149 www_decode(path_string); 00150 00151 /* Decode the parts of the path_string into a vector. The first component of the 00152 vector will be empty if the path_string starts with '/'. 00153 We also refuse to append empty strings to path_. This handles cases like 00154 "/a//b" which will result in <"","a","b">. 00155 00156 We use n0 to indicate the start position of a component and 00157 n1 to indicate the position just after the end of the component. 00158 */ 00159 string::size_type n0(path_string.find_first_not_of('/')); 00160 00161 if (n0!=0) // the path starts with '/' which we encode as a first "/" component. 00162 path_.push_back(""); 00163 00164 if (n0==string::npos) // path_string must be "/" 00165 return true; 00166 00167 string::size_type n1(string::npos); 00168 00169 /* The variable n0 points to the start of a component. We make n1 point to 00170 the next '/', if any. The component then is the substring starting at 00171 n0 and ending before n1. 00172 */ 00173 while ((n1=path_string.find_first_of('/',n0))!=string::npos) { 00174 if (n1>n0) // only non-empty parts are stored. 00175 path_.push_back(path_string.substr(n0,n1-n0)); 00176 n0 = n1+1; 00177 } 00178 // If path_string does not end with '/', we must still add the final component. 00179 if (n0<path_string.size()) 00180 path_.push_back(path_string.substr(n0)); 00181 return true; 00182 } |
|
Auxiliary function for parse_line().
Definition at line 186 of file logrecord.C. Referenced by parse_line().
00186 { 00187 /* The domain informatin is easy to find: it's the first part of 00188 a line, followed by a ' ', as illustrated in the following example. 00189 00190 igwe.vub.ac.be - - [12/Feb/2000:13:18:49 +0100] ... 00191 00192 */ 00193 string::size_type domain_start(0); 00194 string::size_type domain_end(line.find_first_of(' ')); 00195 if (domain_end==string::npos) 00196 return false; 00197 string domain_string(line,domain_start,domain_end); 00198 00199 string::size_type n0(domain_string.find_first_not_of('.')); 00200 if (n0!=0) 00201 return false; 00202 string::size_type n1(string::npos); 00203 00204 while ((n1 = domain_string.find_first_of('.',n0)) != string::npos) { 00205 if (n1>n0) 00206 domain_.push_back(domain_string.substr(n0,n1-n0)); 00207 n0 = n1+1; 00208 } 00209 // If domain does not end with '.', we still need to add a final component. 00210 if (n0<domain_string.size()) 00211 domain_.push_back(domain_string.substr(n0)); 00212 00213 return true; 00214 } |
|
Definition at line 28 of file logrecord.h. |
|
Definition at line 29 of file logrecord.h. |
|
Definition at line 30 of file logrecord.h. |