Class: HikingGuide::Scraper
- Inherits:
-
Object
- Object
- HikingGuide::Scraper
- Defined in:
- lib/hiking_guide/scraper.rb
Overview
require ‘open-uri’
Class Method Summary collapse
- .clean_description(page) ⇒ Object
- .scrape_trail_list(list_url) ⇒ Object
- .scrape_trail_profile(profile_url) ⇒ Object
Class Method Details
.clean_description(page) ⇒ Object
49 50 51 52 53 54 55 56 57 |
# File 'lib/hiking_guide/scraper.rb', line 49 def self.clean_description(page) paragraphs = [] page.css("p").each do |paragraph| paragraphs << paragraph.text end paragraphs.delete_if {|p| p.include? ("Printable Topo Hike Map")} paragraphs.delete_if {|p| p.include? ("(<!--")} paragraphs end |
.scrape_trail_list(list_url) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/hiking_guide/scraper.rb', line 5 def self.scrape_trail_list(list_url) page = Nokogiri::HTML(open(list_url)) state_trails = { :md => page.css("#CollapsiblePanelOMH a"), :nc => page.css("#CollapsiblePanelONCH a"), :pa => page.css("#CollapsiblePanelOPH a"), :va => page.css("#CollapsiblePanelOVH a"), :wv => page.css("#CollapsiblePanelOWVH a"), } trail_array = [] state_trails.each do |state, trails| trails.each do |trail| trail_array << { :name=> trail.children.text, :profile_url=> HikingGuide::CommandLineInteface::BASE_PATH+trail.attributes["href"].value, :state=> "#{state}".upcase } end end trail_array end |
.scrape_trail_profile(profile_url) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/hiking_guide/scraper.rb', line 27 def self.scrape_trail_profile(profile_url) page = Nokogiri::HTML(open(profile_url)) trail = {} reference = page.css("td").find_index {|td| td.text == "Camping"} page.css("td")[reference+1] != nil ? trail[:length] = page.css("td")[reference+1].text.split.join(" ") : "unknown" page.css("td")[reference+8].children[0] != nil ? trail[:hiking_time] = page.css("td")[reference+8].children[0].text.split.join(" ") : "unknown" page.css("td")[reference+8].children[2] != nil ? trail[:elevation_gain] = page.css("td")[reference+8].children[2].text.split.join(" ") : "unknown" page.css("p") != nil ? trail[:description] = clean_description(page).join(" ").split.join(" ")[0..750].gsub(/\s\w+\s*$/, '...') : "unknown" trail[:map_pdf_link] = profile_url+("images/Map.pdf") trail end |