Class: Log2COUNTER::Parser
- Inherits:
-
Object
- Object
- Log2COUNTER::Parser
- Defined in:
- lib/log2counter/parser.rb
Constant Summary collapse
- ABBR2MONTH =
Map month abbreviations to their two-digit number equivalent.
{}
- DEFAULT_MONTHS =
By default we will consider the current year.
(1..12).map { |month| '%d_%02d' % [year, month] }
- DEFAULT_STATS =
This is what we start with – all zero.
{ :sessions => 0, :searches => 0, :downloads => 0 }
- DEFAULT_REGEXP =
NOTE:
:id
should contain capture group for ID { :id => //, :login => //, :search => //, :download => // }
Instance Attribute Summary collapse
-
#constraint ⇒ Object
readonly
Returns the value of attribute constraint.
-
#licensees ⇒ Object
readonly
Returns the value of attribute licensees.
-
#licensees_by_id ⇒ Object
readonly
Returns the value of attribute licensees_by_id.
-
#licensees_by_ip ⇒ Object
readonly
Returns the value of attribute licensees_by_ip.
-
#log_file ⇒ Object
readonly
Returns the value of attribute log_file.
-
#months ⇒ Object
readonly
Returns the value of attribute months.
-
#regexp ⇒ Object
readonly
Returns the value of attribute regexp.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(log_file, licensees, months = nil, regexp = nil) ⇒ Parser
constructor
A new instance of Parser.
-
#parse ⇒ Object
Now here’s the method you want to call.
Constructor Details
#initialize(log_file, licensees, months = nil, regexp = nil) ⇒ Parser
Returns a new instance of Parser.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/log2counter/parser.rb', line 83 def initialize(log_file, licensees, months = nil, regexp = nil) @log_file = log_file @months = months || DEFAULT_MONTHS raise ArgumentError, "illegal format for month; must be YYYY_MM" if @months.any? { |month| month !~ /\A\d\d\d\d_\d\d\z/ } @regexp = DEFAULT_REGEXP.merge(regexp || {}) @constraint = Regexp.union(*@regexp.values) @licensees = licensees.reject { |_, hash| !hash[:export] } initialize_licensees end |
Instance Attribute Details
#constraint ⇒ Object (readonly)
Returns the value of attribute constraint.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def constraint @constraint end |
#licensees ⇒ Object (readonly)
Returns the value of attribute licensees.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def licensees @licensees end |
#licensees_by_id ⇒ Object (readonly)
Returns the value of attribute licensees_by_id.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def licensees_by_id @licensees_by_id end |
#licensees_by_ip ⇒ Object (readonly)
Returns the value of attribute licensees_by_ip.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def licensees_by_ip @licensees_by_ip end |
#log_file ⇒ Object (readonly)
Returns the value of attribute log_file.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def log_file @log_file end |
#months ⇒ Object (readonly)
Returns the value of attribute months.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def months @months end |
#regexp ⇒ Object (readonly)
Returns the value of attribute regexp.
81 82 83 |
# File 'lib/log2counter/parser.rb', line 81 def regexp @regexp end |
Class Method Details
.load(csv_file) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/log2counter/parser.rb', line 65 def load(csv_file) FasterCSV.new(csv_file, :headers => true).inject({}) { |stats, row| month, licensee, name, address, sessions, searches, downloads = row.fields (((stats[month] ||= {})[licensee] ||= {})[name] ||= {})[address] = { :sessions => sessions.to_i, :searches => searches.to_i, :downloads => downloads.to_i } stats } end |
Instance Method Details
#parse ⇒ Object
Now here’s the method you want to call. Returns a hash:
stats = {
'2007_06' => {
'Somewhere, Inst.' => {
'12.34.56.78' => {
:sessions => 12,
:searches => 34,
:downloads => 56
},
...
},
...
},
...
}
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/log2counter/parser.rb', line 114 def parse # Cache resolved host names. addr2addr = Hash.new { |hash, addr| hash[addr] = begin Resolv.getaddress(addr) rescue Resolv::ResolvError addr end } # Cache licensees. addr2lcee = Hash.new { |hash, addr| hash[addr] = licensees_by_ip.get(addr) } # Our result hash stats = {} # Create a new LogParser and send our log file. Yields a hash per line. LogParser.new(:minimal, constraint).parse_io_stream(log_file) { |stat| path = stat['PATH_INFO'] # Skip lines that don't have any useful information for us anyway. next unless path =~ constraint # Maybe we already captured the licensee ID? (see DEFAULT_REGEXP above) id = $1 m, y = stat['DATETIME'][/\/(.*?):/, 1].split('/') # Extract month and year month = [y, ABBR2MONTH[m]].join('_') # Target format is 'YYYY_MM' # Skip lines that fall out of the range we're interested in. next unless months.include?(month) address = addr2addr[stat['REMOTE_ADDR']] licensee = addr2lcee[address] || licensees_by_id[ URI.decode(id || path[regexp[:id], 1] || '') ] # Couldn't find a matching licensee? Skip it! next unless licensee name = licensee[:name] licensee = licensee[:licensee] (((stats[month] ||= {})[licensee] ||= {})[name] ||= {})[address] ||= DEFAULT_STATS.dup _address = stats[month][licensee][name][address] # Increment our counts, since that's what we're here for... _address[:sessions] += 1 if path =~ regexp[:login] _address[:searches] += 1 if path =~ regexp[:search] _address[:downloads] += 1 if path =~ regexp[:download] } # Now we need to fill in any months and licensees we didn't come across before. months.each { |month| stats[month] ||= {} licensees.each { |licensee, hash| stats[month][licensee] ||= {} addresses = stats[month][licensee][hash[:name]] if addresses # Drop entries with zero sessions -- how come they occur, anyway? addresses.delete_if { |_, stat| stat[:sessions].zero? } end # Add a default "empty" entry for completeness' sake. if addresses.nil? || addresses.empty? stats[month][licensee][hash[:name]] = { nil => DEFAULT_STATS } end } } # That's it, return what we've got. stats end |