Module: Croque::Aggregator
- Defined in:
- lib/croque/aggregator.rb
Class Method Summary collapse
- .aggregate(date) ⇒ Object
- .aggregate_per_hour(date) ⇒ Object
- .all ⇒ Object
- .generate_ranking(date) ⇒ Object
Class Method Details
.aggregate(date) ⇒ Object
4 5 6 7 8 9 10 11 |
# File 'lib/croque/aggregator.rb', line 4 def aggregate(date) # remove files remove_files(date) # aggregate per hour aggregate_per_hour(date) # generate_ranking generate_ranking(date) end |
.aggregate_per_hour(date) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/croque/aggregator.rb', line 13 def aggregate_per_hour(date) # scan each file log_files.each do |file| # check skippable next if skippable?(date, file) # all lines linage = 1000 wc_result = `wc -l #{file}` line_count = wc_result.match(/\d+/)[0] k = 1 lines = [] while (k-1)*linage < line_count.to_i fragment = `head -n #{k*1000} #{file} | tail -n #{linage}` fragment_lines = fragment.lines lines += fragment_lines.select do |line| line.match(date_matcher(date)) end k += 1 end # extract the matched line (Date) lines = lines hours.each do |hour| # craete csv file create_csv(date, hour, lines) end end end |
.all ⇒ Object
67 68 69 70 71 72 73 74 75 |
# File 'lib/croque/aggregator.rb', line 67 def all paths = Dir.glob(store_path + '*') paths = paths.select do |path| path.match(/\d{4}\-\d{2}\-\d{2}/) end paths.map do |path| Date.parse(File.basename(path)) end end |
.generate_ranking(date) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/croque/aggregator.rb', line 41 def generate_ranking(date) array = [] hours.each do |hour| # csv data path = csv_path(date, hour) # next if no file next unless File.exist?(path) csv_data = File.open(path, "r").read.gsub(/\r/, "") csv = CSV.new(csv_data) csv.to_a.each do |line| uuid = line[0] processing_time = line[1].to_f # next if processing_time < config.lower_time next if low?(processing_time) array << [date, hour, uuid, processing_time] end end # Processing Time Desc array = array.sort{ |a, b| b[3] <=> a[3] } # Generate CSV data = CSV.generate("", csv_option) do |csv| array.each{ |line| csv << line } end store_csv(ranking_path(date), data) end |