Class: AikatsuCalendar::Scraper
- Inherits:
-
Object
- Object
- AikatsuCalendar::Scraper
- Defined in:
- lib/aikatsu_calendar/scraper.rb
Instance Attribute Summary collapse
-
#day ⇒ Object
Returns the value of attribute day.
-
#month ⇒ Object
Returns the value of attribute month.
-
#schedules ⇒ Object
Returns the value of attribute schedules.
-
#year ⇒ Object
Returns the value of attribute year.
Class Method Summary collapse
Instance Method Summary collapse
- #class_to_type(s) ⇒ Object
- #feed(doc) ⇒ Object
- #feed_item(p) ⇒ Object
- #feed_row(tr) ⇒ Object
- #feed_table(table) ⇒ Object
-
#initialize ⇒ Scraper
constructor
A new instance of Scraper.
- #parse_item(p) ⇒ Object
- #to_json(pretty = false) ⇒ Object
Constructor Details
#initialize ⇒ Scraper
Returns a new instance of Scraper.
22 23 24 |
# File 'lib/aikatsu_calendar/scraper.rb', line 22 def initialize @schedules = [] end |
Instance Attribute Details
#day ⇒ Object
Returns the value of attribute day.
11 12 13 |
# File 'lib/aikatsu_calendar/scraper.rb', line 11 def day @day end |
#month ⇒ Object
Returns the value of attribute month.
11 12 13 |
# File 'lib/aikatsu_calendar/scraper.rb', line 11 def month @month end |
#schedules ⇒ Object
Returns the value of attribute schedules.
11 12 13 |
# File 'lib/aikatsu_calendar/scraper.rb', line 11 def schedules @schedules end |
#year ⇒ Object
Returns the value of attribute year.
11 12 13 |
# File 'lib/aikatsu_calendar/scraper.rb', line 11 def year @year end |
Class Method Details
.scrape(path = AikatsuCalendar::URL) ⇒ Object
13 14 15 16 17 18 19 20 |
# File 'lib/aikatsu_calendar/scraper.rb', line 13 def self.scrape(path=AikatsuCalendar::URL) scraper = new() doc = open(path) {|f| Nokogiri::HTML.parse(f) } scraper.feed(doc) scraper.schedules.uniq do |x| [x[:type], x[:content], x[:date_from], x[:date_until]] end end |
Instance Method Details
#class_to_type(s) ⇒ Object
94 95 96 97 98 99 100 |
# File 'lib/aikatsu_calendar/scraper.rb', line 94 def class_to_type(s) if s =~ /schedule-(\w+)/ $1 else nil end end |
#feed(doc) ⇒ Object
26 27 28 29 30 31 |
# File 'lib/aikatsu_calendar/scraper.rb', line 26 def feed(doc) container = doc.at_css(".info-schedule") container.css('table').each do |table| feed_table(table) end end |
#feed_item(p) ⇒ Object
56 57 58 |
# File 'lib/aikatsu_calendar/scraper.rb', line 56 def feed_item(p) @schedules << parse_item(p) end |
#feed_row(tr) ⇒ Object
45 46 47 48 49 50 51 52 53 54 |
# File 'lib/aikatsu_calendar/scraper.rb', line 45 def feed_row(tr) # 日付 text = tr.at_css('td').text m = text.match(/(\d+)日/) or raise ValueError, text @day = m[1].to_i tr.css('p').each do |p| feed_item(p) end end |
#feed_table(table) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/aikatsu_calendar/scraper.rb', line 33 def feed_table(table) # 年と月 text = table.at_css('th').text m = text.match(/(\d+)年(\d+)月/) or raise ValueError, text @year = m[1].to_i @month = m[2].to_i table.css('tr')[1..-1].each do |tr| feed_row(tr) end end |
#parse_item(p) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/aikatsu_calendar/scraper.rb', line 60 def parse_item(p) # 日付 text = p.text re = /(?: ※)?(\d+)年(\d+)月(\d+)日~(?:(\d+)年)?(\d+)月(\d+)日/ m = text.match(re) if m year_until = (m[4] || m[1]).to_i date_from = Time.local(m[1].to_i, m[2].to_i, m[3].to_i) date_until = Time.local(year_until, m[5].to_i, m[6].to_i) else date_from = date_until = Time.local(@year, @month, @day) end # 日付をとっぱらう text = text.sub(re, '') # URL url = nil if (a = p.at_css('a[href]')) base = 'http://www.aikatsu.com/calender/' url = URI.join(base, a.attr(:href)).to_s end # type type = class_to_type(p.attr(:class)) { type: type, date_from: date_from, date_until: date_until, content: text.strip, link: url, } end |
#to_json(pretty = false) ⇒ Object
102 103 104 105 106 107 108 |
# File 'lib/aikatsu_calendar/scraper.rb', line 102 def to_json(pretty=false) if pretty JSON.pretty_generate(@schedules) else JSON.dump(@schedules) end end |