Class: TransportScrapper

Inherits:
Object
  • Object
show all
Defined in:
lib/TransportScrapper.rb

Class Method Summary collapse

Class Method Details

.scrape(value) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/TransportScrapper.rb', line 114

def self.scrape(value)

@outwardArr = []
@returnArr = []

args = %w[--disable-infobars --headless window-size=1600,1200 --no-sandbox --disable-gpu --disable-dev-shm-usage]
options = {
       binary: ENV['GOOGLE_CHROME_BIN'],
       prefs: { password_manager_enable: false, credentials_enable_service: false },
       args:  args
     }

@browser = Watir::Browser.new(:chrome, options: options)

timeArr = ["0000","0400","0800","1200","1600","2000"]

for time in timeArr do
	temp = value 
  joinstr ="&returntime=#{time}&departtime=#{time}"
  temp = value+joinstr
 
  self.scrapewithtime(temp, time)
end

  #returning array
  result = Hash.new
  result.store("outward" ,@outwardArr)
  result.store("return" ,@returnArr)
  
return result

end

.scrapewithtime(value, time) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/TransportScrapper.rb', line 6

def self.scrapewithtime(value, time)
@browser.goto value
doc = Nokogiri::HTML.parse(@browser.html)
#taking all the hotels list in HTML
onwardtransport_snip = doc.search('div#JourneyRowsOut')
onwardtransport_snip_Full= onwardtransport_snip.css('div.fullTable')
onwardtransport_snip_array = onwardtransport_snip_Full.css('div.tableDataRow.Divtrue')


#iterating over each hotel
onwardtransport_snip_array.each do |transport_elemets|
	depart=	transport_elemets.css('div.tableDataTime.d-T').text.strip!
	arrival=transport_elemets.css('div.tableDataTime').text.strip!
	
	route_element=transport_elemets.css('div.tableRowSelector')
	route=route_element.css('div.tableData').text.strip!
	price_element=	transport_elemets.css('div.tableDataFare')
	price=	price_element.css('span.tablePrice').text
	
	if arrival
	arrivalArr=arrival.split(':')
	val1 = arrivalArr[1].split(//).last(2).join
	val2=	arrivalArr[2].split(//).first(2).join
	arrival ="#{val1}:#{val2}"
	end
	
	
	if price
	price.slice! "\u20AC"
	end
	
	if route
	route=route.gsub(" ","")
	route=route.gsub("\n","")
	end
		
	#new Hash is created and all the values are pushed into the hash map
	output = Hash.new
	output.store("depart", depart)
	output.store("arrival", arrival)
	output.store("price", price)
	output.store("route",route)
	output.store("id", "#{depart}_#{arrival}_#{route}")
	

#pushing to array

if(!@outwardArr.include?(output))
	@outwardArr.push(output)
end
   end




#taking all the hotels list in HTML
returntransport_snip = doc.search('div#JourneyRowsRet')
returntransport_snip_Full= returntransport_snip.css('div.fullTable')
returntransport_snip_array = returntransport_snip_Full.css('div.tableDataRow.Divtrue')


#iterating over each hotel
returntransport_snip_array.each do |transport_elemets|
	depart=	transport_elemets.css('div.tableDataTime.d-T').text.strip!
	arrival=	transport_elemets.css('div.tableDataTime').text.strip!
	route_element=transport_elemets.css('div.tableRowSelector')
	route=route_element.css('div.tableData').text.strip!
	price_element=	transport_elemets.css('div.tableDataFare')
	price=	price_element.css('span.tablePrice').text
	
	if arrival
	arrivalArr=arrival.split(':')
	val1 = arrivalArr[1].split(//).last(2).join
	val2=	arrivalArr[2].split(//).first(2).join
	arrival ="#{val1}:#{val2}"
	end
	
	if price
	price.slice! "\u20AC"
	end
	
	if route
	route=route.gsub(" ","")
	route=route.gsub("\n","")
	end
		
	#new Hash is created and all the values are pushed into the hash map
	output = Hash.new
	output.store("depart", depart)
	output.store("arrival", arrival)
	output.store("price", price)
	output.store("route",route)
	output.store("id", "#{depart}_#{arrival}_#{route}")

#pushing to array

if(!@returnArr.include?(output))
	@returnArr.push(output)
end
   end

puts '----------------------------------------------------------------------------'
puts @outwardArr
puts @returnArr
puts '----------------------------------------------------------------------------'

end