Module: LoadingData

Defined in:
lib/log_analysis/loading_data.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

REGEX_KEYS =

Your code goes here…

/(time:| host:| status:| size:| request_length:| req:| method:| uri:| referer:| ua:| reqtime:| runtime:| apptime:| cache:| vhost:| server:| user:| forwardedfor:| forwardedproto:)/.freeze
REGEX_NGINX =
/\A^(?<host>\S*) (?<identity>\S*) (?<user>\S*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?:\s+\S*)?)?" (?<code>\S*) (?<size>\S*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)"(?:\s+(?<http_x_forwarded_for>\S+))?)?$/.freeze
REGEX_APACHE =
%r{(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (.{0})- \[([^\]]+?)\] "(GET|POST|PUT|DELETE) ([^\s]+?) (HTTP\/1\.1)" (\d+) (\d+) "-" "(.*)"}.freeze
CONVERT_RECORD =
{ 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze

Class Method Summary collapse

Class Method Details

.convert_nginx_logs(log) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/log_analysis/loading_data.rb', line 38

def self.convert_nginx_logs(log)
  o = log.split(REGEX_NGINX)

  return false if o.size <= 1
  o.delete('')

  {}.tap do |p|
    p['host']      = o[0]
    p['user']      = o[2]
    p['time']      = o[3]
    p['method']    = o[4]
    p['uri']       = o[5]
    p['status']    = o[6]
    p['size']      = o[7]
    p['referer']   = o[8]
    p['ua']        = o[9]
    p['forwarded'] = o[10]
    p['user']      = save_user(o)
  end
end

.input(file_path, type) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/log_analysis/loading_data.rb', line 15

def self.input(file_path, type)
  @users    = []
  text_file = File.readlines(file_path)

  text_file.each_with_object([]).with_index do |(line, arr), i|
    preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
    record_params    = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
    record           = Record.new(record_params) if record_params && preprocessed_log

    system('clear')
    puts "#{((i.to_f / text_file.size) * 100).round}/100"
    arr.push(record) if record
  end
end

.save_user(log) ⇒ Object



63
64
65
66
67
68
69
# File 'lib/log_analysis/loading_data.rb', line 63

def self.save_user(log)
  user = @users.find { |i| i.host == log[0] && i.user_agent.to_s == log[9] }
  return user unless user.nil?

  @users.push(UserIdentity.new(host: IPAddr.new(log[0]), user_agent: UserAgent.parse(log[9])))
  @users.last
end

.to_json(pair) ⇒ Object



59
60
61
# File 'lib/log_analysis/loading_data.rb', line 59

def self.to_json(pair)
  { pair.first.delete(':') => pair.last }
end

.to_record(log) ⇒ Object



30
31
32
33
34
35
36
# File 'lib/log_analysis/loading_data.rb', line 30

def self.to_record(log)
  o = log.gsub!('\t', ' ')
  o = log.split(REGEX_KEYS)
  o = o.map(&:strip)
  o.delete('')
  o.each_slice(2).to_a.each_with_object({}) { |pair, log_obj| log_obj.merge!(to_json(pair)) }
end