Class: HTTPLogAnalyzer::Importer

Inherits:
Object
  • Object
show all
Defined in:
lib/http-log-analyzer/importer.rb

Constant Summary collapse

StatusKeys =
{
  4 => :client_error_statuses,
  5 => :server_error_statuses,
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil) ⇒ Importer

Returns a new instance of Importer.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/http-log-analyzer/importer.rb', line 10

def initialize(
  domain:,
  ignored_ips: nil,
  ignored_cities: nil,
  ignored_browsers: nil,
  ignored_referers: nil,
  ignored_paths: nil,
  period: nil
)
  @domain = domain
  @ignored_ips = ignored_ips || {}
  @ignored_cities = ignored_cities || {}
  @ignored_browsers = ignored_browsers || {}
  @ignored_referers = ignored_referers || {}
  @ignored_paths = ignored_paths || {}
  @period = period ? parse_period(period) : nil
  @log_parser = HttpLogParser.new
  $user_agent_parser = UserAgentParser::Parser.new
  $geo_ip = GeoIP.new(File.expand_path('/usr/local/var/GeoIP/GeoLiteCity.dat'))
  @stats = Stats.new
end

Class Method Details

.parse_list(text) ⇒ Object



82
83
84
85
86
# File 'lib/http-log-analyzer/importer.rb', line 82

def self.parse_list(text)
  Hash[
    text.split(/\n/).map { |s| s.sub(/#.*/, '').strip }.reject(&:empty?).map { |s| [s, true] }
  ]
end

Instance Method Details

#ignore?(entry) ⇒ Boolean

Returns:

  • (Boolean)


64
65
66
67
68
69
70
# File 'lib/http-log-analyzer/importer.rb', line 64

def ignore?(entry)
  @ignored_ips[entry.source.address] ||
    @ignored_browsers[entry.user_agent.browser] ||
    @ignored_cities[entry.source.city] ||
    (entry.referer&.uri && @ignored_referers[entry.referer&.uri&.host]) ||
    @ignored_paths[entry.request.uri.path]
end

#page?(entry) ⇒ Boolean

Returns:

  • (Boolean)


72
73
74
75
76
# File 'lib/http-log-analyzer/importer.rb', line 72

def page?(entry)
  entry.request.uri.path !~ %r{/.*?https?://} &&    # bad URL construction (from bot)
    !entry.request.uri.path.empty? &&
    entry.request.mime_types.empty?  # pages have empty MIME types
end

#parse_period(period) ⇒ Object



32
33
34
# File 'lib/http-log-analyzer/importer.rb', line 32

def parse_period(period)
  Range.new(*period.split(' - ', 2).map { |d| DateTime.parse(d) })
end

#process_line(file, line_num, line) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/http-log-analyzer/importer.rb', line 36

def process_line(file, line_num, line)
  begin
    data = @log_parser.parse_line(line)
  rescue => e
    raise ParseError, "Can't parse line: #{line}"
  end
  entry = Entry.new(data)
  if @period && !@period.cover?(entry.timestamp)
    # ignore timestamp out of specified period
  elsif ignore?(entry)
    @ignored_ips[entry.source.address] = true
  elsif page?(entry)
    if (statuses_key = StatusKeys[entry.status.class])
      @stats.add(statuses_key, "#{entry.status.code}: #{entry.request.uri.path}")
    end
    @stats.add(:source_country, entry.source.country)
    @stats.add(:source_region, entry.source.region)
    @stats.add(:source_city, entry.source.city)
    @stats.add(:wv_source_city, entry.source.city) if entry.source.region == 'West Virginia, United States'
    @stats.add(:pages, entry.request.uri.path)
    @stats.add(:via, entry.referer.uri) unless entry.referer&.uri&.host == @domain
    @stats.add(:searches, entry.referer.query) if entry.referer&.query
    @stats.add(:browsers, entry.user_agent.browser)
    @stats.add(:systems, entry.user_agent.system)
    @stats.add(:dates, entry.calendar_week)
  end
end

#reportObject



78
79
80
# File 'lib/http-log-analyzer/importer.rb', line 78

def report
  @stats.report
end