Class: HTTPLogAnalyzer::Importer
- Inherits:
-
Object
- Object
- HTTPLogAnalyzer::Importer
- Defined in:
- lib/http-log-analyzer/importer.rb
Constant Summary collapse
- StatusKeys =
{ 4 => :client_error_statuses, 5 => :server_error_statuses, }
Class Method Summary collapse
Instance Method Summary collapse
- #ignore?(entry) ⇒ Boolean
-
#initialize(domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil) ⇒ Importer
constructor
A new instance of Importer.
- #page?(entry) ⇒ Boolean
- #parse_period(period) ⇒ Object
- #process_line(file, line_num, line) ⇒ Object
- #report ⇒ Object
Constructor Details
#initialize(domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil) ⇒ Importer
Returns a new instance of Importer.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/http-log-analyzer/importer.rb', line 10 def initialize( domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil ) @domain = domain @ignored_ips = ignored_ips || {} @ignored_cities = ignored_cities || {} @ignored_browsers = ignored_browsers || {} @ignored_referers = ignored_referers || {} @ignored_paths = ignored_paths || {} @period = period ? parse_period(period) : nil @log_parser = HttpLogParser.new $user_agent_parser = UserAgentParser::Parser.new $geo_ip = GeoIP.new(File.('/usr/local/var/GeoIP/GeoLiteCity.dat')) @stats = Stats.new end |
Class Method Details
.parse_list(text) ⇒ Object
82 83 84 85 86 |
# File 'lib/http-log-analyzer/importer.rb', line 82 def self.parse_list(text) Hash[ text.split(/\n/).map { |s| s.sub(/#.*/, '').strip }.reject(&:empty?).map { |s| [s, true] } ] end |
Instance Method Details
#ignore?(entry) ⇒ Boolean
64 65 66 67 68 69 70 |
# File 'lib/http-log-analyzer/importer.rb', line 64 def ignore?(entry) @ignored_ips[entry.source.address] || @ignored_browsers[entry.user_agent.browser] || @ignored_cities[entry.source.city] || (entry.referer&.uri && @ignored_referers[entry.referer&.uri&.host]) || @ignored_paths[entry.request.uri.path] end |
#page?(entry) ⇒ Boolean
72 73 74 75 76 |
# File 'lib/http-log-analyzer/importer.rb', line 72 def page?(entry) entry.request.uri.path !~ %r{/.*?https?://} && # bad URL construction (from bot) !entry.request.uri.path.empty? && entry.request.mime_types.empty? # pages have empty MIME types end |
#parse_period(period) ⇒ Object
32 33 34 |
# File 'lib/http-log-analyzer/importer.rb', line 32 def parse_period(period) Range.new(*period.split(' - ', 2).map { |d| DateTime.parse(d) }) end |
#process_line(file, line_num, line) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/http-log-analyzer/importer.rb', line 36 def process_line(file, line_num, line) begin data = @log_parser.parse_line(line) rescue => e raise ParseError, "Can't parse line: #{line}" end entry = Entry.new(data) if @period && !@period.cover?(entry.) # ignore timestamp out of specified period elsif ignore?(entry) @ignored_ips[entry.source.address] = true elsif page?(entry) if (statuses_key = StatusKeys[entry.status.class]) @stats.add(statuses_key, "#{entry.status.code}: #{entry.request.uri.path}") end @stats.add(:source_country, entry.source.country) @stats.add(:source_region, entry.source.region) @stats.add(:source_city, entry.source.city) @stats.add(:wv_source_city, entry.source.city) if entry.source.region == 'West Virginia, United States' @stats.add(:pages, entry.request.uri.path) @stats.add(:via, entry.referer.uri) unless entry.referer&.uri&.host == @domain @stats.add(:searches, entry.referer.query) if entry.referer&.query @stats.add(:browsers, entry.user_agent.browser) @stats.add(:systems, entry.user_agent.system) @stats.add(:dates, entry.calendar_week) end end |
#report ⇒ Object
78 79 80 |
# File 'lib/http-log-analyzer/importer.rb', line 78 def report @stats.report end |