Class: Log2COUNTER::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/log2counter/parser.rb

Constant Summary collapse

ABBR2MONTH =

Map month abbreviations to their two-digit number equivalent.

{}
DEFAULT_MONTHS =

By default we will consider the current year.

(1..12).map { |month| '%d_%02d' % [year, month] }
DEFAULT_STATS =

This is what we start with – all zero.

{
  :sessions  => 0,
  :searches  => 0,
  :downloads => 0
}
DEFAULT_REGEXP =

NOTE: :id should contain capture group for ID

{
  :id       => //,
  :login    => //,
  :search   => //,
  :download => //
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(log_file, licensees, months = nil, regexp = nil) ⇒ Parser

Returns a new instance of Parser.

Raises:

  • (ArgumentError)


83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/log2counter/parser.rb', line 83

def initialize(log_file, licensees, months = nil, regexp = nil)
  @log_file = log_file

  @months = months || DEFAULT_MONTHS
  raise ArgumentError, "illegal format for month; must be YYYY_MM" if @months.any? { |month|
    month !~ /\A\d\d\d\d_\d\d\z/
  }

  @regexp = DEFAULT_REGEXP.merge(regexp || {})
  @constraint = Regexp.union(*@regexp.values)

  @licensees = licensees.reject { |_, hash| !hash[:export] }
  initialize_licensees
end

Instance Attribute Details

#constraintObject (readonly)

Returns the value of attribute constraint.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def constraint
  @constraint
end

#licenseesObject (readonly)

Returns the value of attribute licensees.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def licensees
  @licensees
end

#licensees_by_idObject (readonly)

Returns the value of attribute licensees_by_id.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def licensees_by_id
  @licensees_by_id
end

#licensees_by_ipObject (readonly)

Returns the value of attribute licensees_by_ip.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def licensees_by_ip
  @licensees_by_ip
end

#log_fileObject (readonly)

Returns the value of attribute log_file.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def log_file
  @log_file
end

#monthsObject (readonly)

Returns the value of attribute months.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def months
  @months
end

#regexpObject (readonly)

Returns the value of attribute regexp.



81
82
83
# File 'lib/log2counter/parser.rb', line 81

def regexp
  @regexp
end

Class Method Details

.load(csv_file) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/log2counter/parser.rb', line 65

def load(csv_file)
  FasterCSV.new(csv_file, :headers => true).inject({}) { |stats, row|
    month, licensee, name, address, sessions, searches, downloads = row.fields

    (((stats[month] ||= {})[licensee] ||= {})[name] ||= {})[address] = {
      :sessions  => sessions.to_i,
      :searches  => searches.to_i,
      :downloads => downloads.to_i
    }

    stats
  }
end

Instance Method Details

#parseObject

Now here’s the method you want to call. Returns a hash:

stats = {
  '2007_06' => {
    'Somewhere, Inst.' => {
      '12.34.56.78' => {
        :sessions  => 12,
        :searches  => 34,
        :downloads => 56
      },
      ...
    },
    ...
  },
  ...
}


114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/log2counter/parser.rb', line 114

def parse
  # Cache resolved host names.
  addr2addr = Hash.new { |hash, addr|
    hash[addr] = begin
      Resolv.getaddress(addr)
    rescue Resolv::ResolvError
      addr
    end
  }

  # Cache licensees.
  addr2lcee = Hash.new { |hash, addr|
    hash[addr] = licensees_by_ip.get(addr)
  }

  # Our result hash
  stats = {}

  # Create a new LogParser and send our log file. Yields a hash per line.
  LogParser.new(:minimal, constraint).parse_io_stream(log_file) { |stat|
    path = stat['PATH_INFO']

    # Skip lines that don't have any useful information for us anyway.
    next unless path =~ constraint

    # Maybe we already captured the licensee ID? (see DEFAULT_REGEXP above)
    id = $1

    m, y  = stat['DATETIME'][/\/(.*?):/, 1].split('/')  # Extract month and year
    month = [y, ABBR2MONTH[m]].join('_')                # Target format is 'YYYY_MM'

    # Skip lines that fall out of the range we're interested in.
    next unless months.include?(month)

    address  = addr2addr[stat['REMOTE_ADDR']]
    licensee = addr2lcee[address] || licensees_by_id[
      URI.decode(id || path[regexp[:id], 1] || '')
    ]

    # Couldn't find a matching licensee? Skip it!
    next unless licensee

    name     = licensee[:name]
    licensee = licensee[:licensee]

    (((stats[month] ||= {})[licensee] ||= {})[name] ||= {})[address] ||= DEFAULT_STATS.dup
    _address = stats[month][licensee][name][address]

    # Increment our counts, since that's what we're here for...
    _address[:sessions]  += 1 if path =~ regexp[:login]
    _address[:searches]  += 1 if path =~ regexp[:search]
    _address[:downloads] += 1 if path =~ regexp[:download]
  }

  # Now we need to fill in any months and licensees we didn't come across before.
  months.each { |month|
    stats[month] ||= {}

    licensees.each { |licensee, hash|
      stats[month][licensee] ||= {}
      addresses = stats[month][licensee][hash[:name]]

      if addresses
        # Drop entries with zero sessions -- how come they occur, anyway?
        addresses.delete_if { |_, stat| stat[:sessions].zero? }
      end

      # Add a default "empty" entry for completeness' sake.
      if addresses.nil? || addresses.empty?
        stats[month][licensee][hash[:name]] = { nil => DEFAULT_STATS }
      end
    }
  }

  # That's it, return what we've got.
  stats
end