Module: TottoriOpenDataCatalog::Parser

Defined in:
lib/tottori-opendata-catalog/parser.rb

Class Method Summary collapse

Class Method Details

.parse_index(string) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/tottori-opendata-catalog/parser.rb', line 9

def parse_index(string)
  doc = Nokogiri::HTML(string, nil, 'Shift_JIS')
  # collect
  name = doc.xpath('//title').text.strip
  anchors = doc.xpath('//a')
  categories = anchors.select{ |a|
    a[:href].include?('forweb_bunrui')
  }.map{ |a|
    {name:a.text.strip, link:a[:href].strip}
  }
  items = {
    name:name,
    categories:categories,
  }
  return items
end

.parse_list(string) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/tottori-opendata-catalog/parser.rb', line 26

def parse_list(string)
  doc = Nokogiri::HTML(string, nil, 'Shift_JIS')
  # collect
  items = doc.xpath('//table[@id="contentslist"]/tr[position() > 1]').map{ |tr|
    tds = tr.children
    item = {
      name:tds[0].text.strip,
      link:tds[0].children[0][:href],
      formats:tds[1].text.strip,
      url:tds[2].children[0][:href],
      department:tds[3].text.strip,
      division:tds[4].text.strip,
    }
  }
  # trim
  items.each{ |item|
    if item[:formats]
      item[:formats].upcase!
      item[:formats] = item[:formats].split
    end
  }
  # HACK change format from Notes to PDF.
  target = items.select{ |item|
    item[:link] == 'list1_forweb/10EE759AAD20B54749257C68000A2845?OpenDocument'
  }.first
  target[:formats] = ['PDF'] if target
  return items
end

.parse_record(string) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/tottori-opendata-catalog/parser.rb', line 55

def parse_record(string)
  doc = Nokogiri::HTML(string, nil, 'Shift_JIS')
  # collect
  item = doc.xpath('//form/div[@id="all"]').children.map{ |e|
    case e.text.strip
    when /データ年次.*?:(.*)$/
      {updated_at:$1.strip}
    when /情報.*?:(.*)/
      {tags:$1.strip}
    when /再配布可否.*?:(.*)$/
      {redistribution_allowed:$1.strip}
    when /商用利用可否.*?:(.*)$/
      {commercial_use_allowed:$1.strip}
    when /テータ提供.*?:(.*)$/
      {provider:$1.strip}
    when /問い合わせ先電話番号.*?:(.*)$/
      {tel:$1.strip}
    when /内容.*?:(.*)$/
      {description:$1.strip}
    when /コメント.*?:(.*)$/
      {comment:$1.strip}
    end
  }.compact.inject(&:merge)
  # trim
  if item[:redistribution_allowed] == '○'
    item[:redistribution_allowed] = true
  end
  if item[:commercial_use_allowed] == '○'
    item[:commercial_use_allowed] = true
  end
  updated_at = item.delete(:updated_at)
  if updated_at
    updated_at.gsub!(' ', ' ')
    updated_at.tr!('0-9', '0-9')
  end
  if !updated_at.gsub(' ', '').empty?
    date, repeat_rule_description = updated_at.split
    year, month, day = date.split('.')
  end
  item[:year]  = (year  ? year.to_i  : nil)
  item[:month] = (month ? month.to_i : nil)
  item[:day]   = (day   ? day.to_i   : nil)
  item[:repeat_rule] = {}
  item[:repeat_rule].merge!(
    case repeat_rule_description
    when /毎年度/
      {frequency:'fiscal_yearly', interval:nil}
    when /毎年/
      {frequency:'yearly', interval:nil}
    when /毎月/
      {frequency:'monthly', interval:nil}
    when /毎週/
      {frequency:'weekly', interval:nil}
    when /(\d)年度毎/
      {frequency:'fiscal_yearly', interval:$1.to_i}
    when /(\d)年毎/
      {frequency:'yearly', interval:$1.to_i}
    when /(\d)月毎/, /(\d)ヶ月毎/
      {frequency:'monthly', interval:$1.to_i}
    when /(\d)週毎/
      {frequency:'weekly', interval:$1.to_i}
    when /随時/
      {frequency:'as_needed', interval:nil}
    when /不定期/
      {frequency:'unscheduled', interval:nil}
    else
      {frequency:nil, interval:nil}
  end)
  item[:repeat_rule].merge!(description:repeat_rule_description)
  # 繰り返しルールの説明が有るのに周期が無い場合は例外をスロー
  if !item[:repeat_rule][:description].nil? && item[:repeat_rule][:frequency].nil?
    raise
  end
  if item.include?(:tags)
    item[:tags] = item[:tags].split
  end
  return item
end