Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/viaggiatreno/scraper.rb

Instance Method Summary collapse

Constructor Details

#initialize(train_number, train) ⇒ Scraper

Returns a new instance of Scraper.



12
13
14
15
16
17
18
# File 'lib/viaggiatreno/scraper.rb', line 12

def initialize(train_number, train)
  @site_info_main = ViaggiatrenoURLs::SITE_INFO_MAIN.gsub(
    RegExpMatchInfo::STR_TRAIN_NUMBER_URL_REPLACE, train_number)
  @site_info_details = ViaggiatrenoURLs::SITE_INFO_DETAILS.gsub(
    RegExpMatchInfo::STR_TRAIN_NUMBER_URL_REPLACE, train_number)
  @train = train
end

Instance Method Details

#fetch_train_delay(status) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/viaggiatreno/scraper.rb', line 45

def fetch_train_delay(status)
  return nil if @train.state == TrainState::NOT_DEPARTED
  if status =~ RegExpMatchInfo::REGEXP_NODELAY_STR
    delay = 0
  else
    delay = status.match(RegExpMatchInfo::REGEXP_DELAY_STR)[1].to_i
    if status.match(RegExpMatchInfo::REGEXP_DELAY_STR)[2] != RegExpMatchInfo::STR_DELAY_STR
      delay *= -1 # train is ahead of schedule, delay is negative
    end
  end
  delay
end

#fetch_trainstop_arrival_time(xpath) ⇒ Object



84
85
86
87
88
89
90
91
92
93
# File 'lib/viaggiatreno/scraper.rb', line 84

def fetch_trainstop_arrival_time(xpath)
  scheduled_arrival_time = StringUtils.remove_newlines_tabs_and_spaces(
    xpath.xpath(XPathMatchInfo::XPATH_DETAILS_SCHEDULED_STOP_TIME).first).to_s
  actual_arrival_time = StringUtils.remove_newlines_tabs_and_spaces(
    xpath.xpath(XPathMatchInfo::XPATH_DETAILS_ACTUAL_STOP_TIME).first).to_s
  {
    'scheduled_arrival_time' => scheduled_arrival_time,
    'actual_arrival_time' => actual_arrival_time
  }
end

#update_trainObject

fetch and parse basic train information (status, train)name, details)



21
22
23
24
25
26
27
28
# File 'lib/viaggiatreno/scraper.rb', line 21

def update_train
  doc = Nokogiri::HTML(open(@site_info_main))
  @train.status = StringUtils.remove_newlines_tabs_and_spaces(
    doc.xpath(XPathMatchInfo::XPATH_STATUS).first)
  @train.train_name = doc.xpath(XPathMatchInfo::XPATH_TRAIN_NAME).first.content
  update_train_status(@train)
  @train.delay = fetch_train_delay(@train.status)
end

#update_train_detailsObject

fetch and parse train details (departing and arriving station, intermediate stops)



60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/viaggiatreno/scraper.rb', line 60

def update_train_details
  doc = Nokogiri::HTML(open(@site_info_details))
  doc.xpath(XPathMatchInfo::XPATH_DETAILS_GENERIC).each do |x|
    @station_name = x.xpath(XPathMatchInfo::XPATH_DETAILS_STATION_NAME).first.to_s
    arrival_time = fetch_trainstop_arrival_time(x)
    @scheduled_arrival_time = arrival_time['scheduled_arrival_time']
    @actual_arrival_time = arrival_time['actual_arrival_time']
    @status = update_trainstop_status(x, @train, @status)
    @train.add_stop(TrainStop.new(
                      @station_name, @scheduled_arrival_time,
                      @actual_arrival_time, @status))
  end
end

#update_train_status(train) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/viaggiatreno/scraper.rb', line 30

def update_train_status(train)
  case
  when train.status =~ RegExpMatchInfo::REGEXP_STATE_NOT_DEPARTED
    train.state = TrainState::NOT_DEPARTED
  when train.status =~ RegExpMatchInfo::REGEXP_STATE_ARRIVED
    train.state = TrainState::ARRIVED
  when train.status =~ RegExpMatchInfo::REGEXP_STATE_TRAVELING
    train.state = TrainState::TRAVELING
    regex_match = train.status.match(
      RegExpMatchInfo::REGEXP_STATE_TRAVELING)
    train.last_update = regex_match[3].strip
    train.status = regex_match[1].rstrip
  end
end

#update_trainstop_status(x, train, status) ⇒ Object



74
75
76
77
78
79
80
81
82
# File 'lib/viaggiatreno/scraper.rb', line 74

def update_trainstop_status(x, train, status)
  status = if x.attributes['class'].to_s =~ RegExpMatchInfo::REGEXP_STOP_ALREADY_DONE && \
              train.state != TrainState::NOT_DEPARTED
             TrainStopState::DONE
           else
             TrainStopState::TO_BE_DONE
           end
  status
end