Module: Croque::Aggregator

Defined in:
lib/croque/aggregator.rb

Class Method Summary collapse

Class Method Details

.aggregate(date) ⇒ Object



4
5
6
7
8
9
10
11
# File 'lib/croque/aggregator.rb', line 4

def aggregate(date)
  # remove files
  remove_files(date)
  # aggregate per hour
  aggregate_per_hour(date)
  # generate_ranking
  generate_ranking(date)
end

.aggregate_per_hour(date) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/croque/aggregator.rb', line 13

def aggregate_per_hour(date)
  # scan each file
  log_files.each do |file|
    # check skippable
    next if skippable?(date, file)
    # all lines
    linage = 1000
    wc_result = `wc -l #{file}`
    line_count = wc_result.match(/\d+/)[0]
    k = 1
    lines = []
    while (k-1)*linage < line_count.to_i
      fragment = `head -n #{k*1000} #{file} | tail -n #{linage}`
      fragment_lines = fragment.lines
      lines += fragment_lines.select do |line|
        line.match(date_matcher(date))
      end
      k += 1
    end
    # extract the matched line (Date)
    lines = lines
    hours.each do |hour|
      # craete csv file
      create_csv(date, hour, lines)
    end
  end
end

.allObject



67
68
69
70
71
72
73
74
75
# File 'lib/croque/aggregator.rb', line 67

def all
  paths = Dir.glob(store_path + '*')
  paths = paths.select do |path|
    path.match(/\d{4}\-\d{2}\-\d{2}/)
  end
  paths.map do |path|
    Date.parse(File.basename(path))
  end
end

.generate_ranking(date) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/croque/aggregator.rb', line 41

def generate_ranking(date)
  array = []
  hours.each do |hour|
    # csv data
    path = csv_path(date, hour)
    # next if no file
    next unless File.exist?(path)
    csv_data = File.open(path, "r").read.gsub(/\r/, "")
    csv = CSV.new(csv_data)
    csv.to_a.each do |line|
      uuid = line[0]
      processing_time = line[1].to_f
      # next if processing_time < config.lower_time
      next if low?(processing_time)
      array << [date, hour, uuid, processing_time]
    end
  end
  # Processing Time Desc
  array = array.sort{ |a, b| b[3] <=> a[3] }
  # Generate CSV
  data = CSV.generate("", csv_option) do |csv|
    array.each{ |line| csv << line }
  end
  store_csv(ranking_path(date), data)
end