Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/viaggiatreno/scraper.rb
Instance Method Summary collapse
- #fetch_train_delay(status) ⇒ Object
- #fetch_trainstop_arrival_time(xpath) ⇒ Object
-
#initialize(train_number, train) ⇒ Scraper
constructor
A new instance of Scraper.
-
#update_train ⇒ Object
fetch and parse basic train information (status, train)name, details).
-
#update_train_details ⇒ Object
fetch and parse train details (departing and arriving station, intermediate stops).
- #update_train_status(train) ⇒ Object
- #update_trainstop_status(x, train, status) ⇒ Object
Constructor Details
#initialize(train_number, train) ⇒ Scraper
Returns a new instance of Scraper.
12 13 14 15 16 17 18 |
# File 'lib/viaggiatreno/scraper.rb', line 12 def initialize(train_number, train) @site_info_main = ViaggiatrenoURLs::SITE_INFO_MAIN.gsub( RegExpMatchInfo::STR_TRAIN_NUMBER_URL_REPLACE, train_number) @site_info_details = ViaggiatrenoURLs::SITE_INFO_DETAILS.gsub( RegExpMatchInfo::STR_TRAIN_NUMBER_URL_REPLACE, train_number) @train = train end |
Instance Method Details
#fetch_train_delay(status) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/viaggiatreno/scraper.rb', line 45 def fetch_train_delay(status) return nil if @train.state == TrainState::NOT_DEPARTED if status =~ RegExpMatchInfo::REGEXP_NODELAY_STR delay = 0 else delay = status.match(RegExpMatchInfo::REGEXP_DELAY_STR)[1].to_i if status.match(RegExpMatchInfo::REGEXP_DELAY_STR)[2] != RegExpMatchInfo::STR_DELAY_STR delay *= -1 # train is ahead of schedule, delay is negative end end delay end |
#fetch_trainstop_arrival_time(xpath) ⇒ Object
84 85 86 87 88 89 90 91 92 93 |
# File 'lib/viaggiatreno/scraper.rb', line 84 def fetch_trainstop_arrival_time(xpath) scheduled_arrival_time = StringUtils.remove_newlines_tabs_and_spaces( xpath.xpath(XPathMatchInfo::XPATH_DETAILS_SCHEDULED_STOP_TIME).first).to_s actual_arrival_time = StringUtils.remove_newlines_tabs_and_spaces( xpath.xpath(XPathMatchInfo::XPATH_DETAILS_ACTUAL_STOP_TIME).first).to_s { 'scheduled_arrival_time' => scheduled_arrival_time, 'actual_arrival_time' => actual_arrival_time } end |
#update_train ⇒ Object
fetch and parse basic train information (status, train)name, details)
21 22 23 24 25 26 27 28 |
# File 'lib/viaggiatreno/scraper.rb', line 21 def update_train doc = Nokogiri::HTML(open(@site_info_main)) @train.status = StringUtils.remove_newlines_tabs_and_spaces( doc.xpath(XPathMatchInfo::XPATH_STATUS).first) @train.train_name = doc.xpath(XPathMatchInfo::XPATH_TRAIN_NAME).first.content update_train_status(@train) @train.delay = fetch_train_delay(@train.status) end |
#update_train_details ⇒ Object
fetch and parse train details (departing and arriving station, intermediate stops)
60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/viaggiatreno/scraper.rb', line 60 def update_train_details doc = Nokogiri::HTML(open(@site_info_details)) doc.xpath(XPathMatchInfo::XPATH_DETAILS_GENERIC).each do |x| @station_name = x.xpath(XPathMatchInfo::XPATH_DETAILS_STATION_NAME).first.to_s arrival_time = fetch_trainstop_arrival_time(x) @scheduled_arrival_time = arrival_time['scheduled_arrival_time'] @actual_arrival_time = arrival_time['actual_arrival_time'] @status = update_trainstop_status(x, @train, @status) @train.add_stop(TrainStop.new( @station_name, @scheduled_arrival_time, @actual_arrival_time, @status)) end end |
#update_train_status(train) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/viaggiatreno/scraper.rb', line 30 def update_train_status(train) case when train.status =~ RegExpMatchInfo::REGEXP_STATE_NOT_DEPARTED train.state = TrainState::NOT_DEPARTED when train.status =~ RegExpMatchInfo::REGEXP_STATE_ARRIVED train.state = TrainState::ARRIVED when train.status =~ RegExpMatchInfo::REGEXP_STATE_TRAVELING train.state = TrainState::TRAVELING regex_match = train.status.match( RegExpMatchInfo::REGEXP_STATE_TRAVELING) train.last_update = regex_match[3].strip train.status = regex_match[1].rstrip end end |
#update_trainstop_status(x, train, status) ⇒ Object
74 75 76 77 78 79 80 81 82 |
# File 'lib/viaggiatreno/scraper.rb', line 74 def update_trainstop_status(x, train, status) status = if x.attributes['class'].to_s =~ RegExpMatchInfo::REGEXP_STOP_ALREADY_DONE && \ train.state != TrainState::NOT_DEPARTED TrainStopState::DONE else TrainStopState::TO_BE_DONE end status end |