Class: APDM::Issues::ETL

Inherits:
Object
  • Object
show all
Defined in:
lib/apdm/issues/etl.rb

Defined Under Namespace

Classes: NoMatchingChannel

Class Method Summary collapse

Class Method Details

.extract(source) ⇒ Object



33
34
35
36
37
38
39
40
# File 'lib/apdm/issues/etl.rb', line 33

def extract(source)
  channels = {}
  CSV::open(source, 'r', :col_sep => ";").each do |url, name, date|
    channels[url] ||= []
    channels[url] << date
  end
  channels
end

.find(url) ⇒ Object

Raises:



49
50
51
52
53
54
# File 'lib/apdm/issues/etl.rb', line 49

def find(url)
  label = url.split('.')[1]
  found = APDM::Channel.find_by_label(label)
  raise NoMatchingChannel.new("#{label}, who? Could not infer channel from URL #{url}.") unless found
  found
end

.import(source) ⇒ Object



23
24
25
26
27
28
29
30
31
# File 'lib/apdm/issues/etl.rb', line 23

def import(source)
  extracted = extract source

  extracted.each do |url, date|
    channel, issues = transform(url, date)

    save channel.label, issues
  end
end

.save(label, issues) ⇒ Object



56
57
58
59
60
61
62
63
# File 'lib/apdm/issues/etl.rb', line 56

def save(label, issues)
  puts "#{label}: #{issues.count} issues"
  File.open(path(label), 'w') do |f|
    f.write "APDM::ISSUES['#{label}'] = [\n"
    f.write issues.map {|date| "  Date.parse('#{date}')" }.join(",\n")
    f.write "\n]\n"
  end
end

.transform(url, dates) ⇒ Object



42
43
44
45
46
47
# File 'lib/apdm/issues/etl.rb', line 42

def transform(url, dates)
  [
    find(url),
    dates.map {|d| Date.parse(d)}.reject {|d| d.year < current_year}
  ]
end