httpstats-final

00010                                         {
00011 /* Clear current vector<string> data members. This is
00012    important because parsing will append to path_ and domain_.
00013 */
00014 path_.clear(); 
00015 domain_.clear();
00016 /* A parse succeeds if all components can be parsed.
00017    This should be modified so that we only parse what is necessary,
00018    according to the configuration. But for now, we are too lazy.
00019 */
00020 return parse_date(line) && parse_path(line) && parse_domain(line);
00021 }

00016 { return date_; }

00017 { return path_; }

00018 { return domain_; }

00025                                         {
00026 /* Select date part: just after first '[' up to first folloing ' '
00027    e.g.
00028 
00029         spider1.tiscalinet.be - - [02/Nov/2000:10:20:36 +0100] ..
00030 
00031    will select
00032 
00033         02/Nov/2000:10:20:36
00034 */
00035 string::size_type date_start(line.find_first_of('['));
00036 string::size_type date_end(line.find_first_of(' ',date_start));
00037 string            date_string(line,date_start+1,date_end - date_start-1);
00038 
00039 /* Make date_string acceptable to the Date::Date(const char*) parser.
00040 
00041    1. replace '/' by '-'. In the example, this will yield 
00042 
00043         02-Nov-2000:10:20:36
00044 */
00045 for (string::size_type i=0; i<date_string.size(); ++i)
00046   if (date_string[i]=='/')
00047     date_string[i] = '-';
00048 /*
00049    2. Replace first ':' by space. In the example, this will yield 
00050 
00051         02-Nov-2000 10:20:36
00052 */
00053 date_string.replace(date_string.find_first_of(':'),1," ");
00054 
00055 /* Now parse using Date::Date(const string&). It will throw an exception
00056    if the date cannot be parsed. In that case, we catch the exception
00057    and return false.
00058 */
00059 try {
00060   Date  date(date_string);
00061   date_ = date;
00062   // We are only interested in hours, not minutes or seconds.
00063   date_.minutes(0);
00064   date_.seconds(0);
00065   }
00066 catch (exception& e) {
00067   cerr << "parse_date: " << e.what() << endl;
00068   return false;
00069   }
00070 return true;
00071 }

00111                                         {
00112 /* The path part can be found between the first and second occurrences
00113    of `"' (double quote). An example is shown below.
00114 
00115     "GET /ssl/kiesoos.cgi?rolnr=58769\&stjcode=5L10021 HTTP/1.0" 
00116 
00117    This will result in 
00118 
00119      _path = <"","ssl","kiesoos.cgi">
00120 */
00121 string::size_type path_start(line.find_first_of('\"'));
00122 if (path_start==string::npos)
00123   return false;
00124 /* We are not interested in the verb (e.g. GET), so we skip until after the first ' '.
00125    In the example, path_start would then point to.
00126 
00127     /ssl/kiesoos.cgi?rolnr=58769\&stjcode=5L10021 HTTP/1.0" 
00128 */
00129 path_start = line.find_first_of(' ',path_start);
00130 if (path_start==string::npos)
00131   return false;
00132 ++path_start; // After ' '.
00133 /*  We only want the path until the first '?', '#' or ' '. In the example
00134     this should result in.
00135 
00136     path_string = /ssl/kiesoos.cgi
00137 */
00138 
00139 string::size_type path_end = line.find_first_of("#? ",path_start);
00140 if (path_end==string::npos)
00141   return false;
00142 
00143 if (path_end==path_start) // cannot have an empty path
00144   return false;
00145 
00146 string path_string(line,path_start,path_end - path_start);
00147 
00148 // Decode to translate "%7E" back to '~' etc.
00149 www_decode(path_string);
00150 
00151 /* Decode the parts of the path_string into a vector. The first component of the
00152    vector will be empty if the path_string starts with '/'. 
00153    We also refuse to append empty strings to path_. This handles cases like
00154    "/a//b" which will result in <"","a","b">.
00155 
00156   We use n0 to indicate the start position of a component and
00157   n1 to indicate the position just after the end of the component.
00158 */
00159 string::size_type n0(path_string.find_first_not_of('/'));
00160 
00161 if (n0!=0) // the path starts with '/' which we encode as a first "/" component.
00162   path_.push_back("");
00163 
00164 if (n0==string::npos) // path_string must be "/"
00165   return true;
00166 
00167 string::size_type n1(string::npos);
00168 
00169 /* The variable n0 points to the start of a component. We make n1 point to
00170    the next '/', if any. The component then is the substring starting at
00171    n0 and ending before n1.
00172 */
00173 while ((n1=path_string.find_first_of('/',n0))!=string::npos) {
00174   if (n1>n0) // only non-empty parts are stored.
00175     path_.push_back(path_string.substr(n0,n1-n0));
00176   n0 = n1+1;
00177   }
00178 // If path_string does not end with '/', we must still add the final component.
00179 if (n0<path_string.size())
00180   path_.push_back(path_string.substr(n0));
00181 return true;
00182 }

00186                                           {
00187 /* The domain informatin is easy to find: it's the first part of
00188    a line, followed by a ' ', as illustrated in the following example.
00189 
00190         igwe.vub.ac.be - - [12/Feb/2000:13:18:49 +0100] ...
00191         
00192 */
00193 string::size_type domain_start(0);
00194 string::size_type domain_end(line.find_first_of(' '));
00195 if (domain_end==string::npos)
00196   return false;
00197 string domain_string(line,domain_start,domain_end);
00198 
00199 string::size_type n0(domain_string.find_first_not_of('.'));
00200 if (n0!=0)
00201   return false;
00202 string::size_type n1(string::npos);
00203 
00204 while ((n1 = domain_string.find_first_of('.',n0)) != string::npos) {
00205   if (n1>n0)
00206     domain_.push_back(domain_string.substr(n0,n1-n0));
00207   n0 = n1+1;
00208   }
00209 // If domain does not end with '.', we still need to add a final component.
00210 if (n0<domain_string.size()) 
00211   domain_.push_back(domain_string.substr(n0));
00212   
00213 return true;
00214 }

Public Methods
	LogRecord ()
bool	parse_line (const string &line)
	Fill in date_, domain_, path_ components from line from log file.
const Date&	date () const
const vector<string>&	path () const
const vector<string>&	domain () const
Private Methods
bool	parse_date (const string &line)
	Auxiliary function for parse_line().
bool	parse_path (const string &line)
	Auxiliary function for parse_line().
bool	parse_domain (const string &line)
	Auxiliary function for parse_line().
Private Attributes
Date	date_
vector<string>	path_
vector<string>	domain_

LogRecord Class Reference

Public Methods

Private Methods

Private Attributes

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation