Class: ClosestWeightliftingGem::Scraper
- Inherits:
-
Object
- Object
- ClosestWeightliftingGem::Scraper
- Defined in:
- lib/closest_weightlifting_gem/scraper.rb
Constant Summary collapse
- BASE_URL =
"https://webpoint.usaweightlifting.org/wp15/Companies/"
Class Method Summary collapse
- .get_state_abbreviations(index) ⇒ Object
- .scrape_attributes(gym) ⇒ Object
- .scrape_gym_page(gym_row) ⇒ Object
- .scrape_main ⇒ Object
- .scrape_state_page(state) ⇒ Object
Class Method Details
.get_state_abbreviations(index) ⇒ Object
69 70 71 |
# File 'lib/closest_weightlifting_gem/scraper.rb', line 69 def self.get_state_abbreviations(index) index.search("select").children.collect { |child| child.attr("value") }[2..-1] end |
.scrape_attributes(gym) ⇒ Object
58 59 60 61 62 63 64 65 66 67 |
# File 'lib/closest_weightlifting_gem/scraper.rb', line 58 def self.scrape_attributes(gym) gym_doc = Nokogiri::HTML(open("#{BASE_URL + gym.usaw_url}")) gym.add_attributes({ :phone => gym_doc.search(".fe_big_row:nth-child(4) td").children.last.to_s[1..-1], :director => gym_doc.search(".fe_big_row:nth-child(2) td+ td").text, :coach => gym_doc.search(".fe_big_row+ .fe_big_row td+ td").text, :website => gym_doc.text.split("site:")[1].split("\r").first[1..-1] }) end |
.scrape_gym_page(gym_row) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/closest_weightlifting_gem/scraper.rb', line 40 def self.scrape_gym_page(gym_row) gym_doc = Nokogiri::HTML(open("#{BASE_URL + gym_row.search("a").first.attr("onclick").match(/\/V.+true/)[0]}")) ClosestWeightliftingGem::Gym.new({ :name => gym_doc.search(".fe_vbig_row td").text.titleize, :street => gym_doc.search(".fe_vbig_row+ .fe_big_row td").children.to_s.split("<br>")[0], :city => gym_doc.search(".fe_vbig_row+ .fe_big_row td").children.to_s.split("<br>")[1].split(",")[0], :state => gym_doc.search(".fe_vbig_row+ .fe_big_row td").children.to_s.split(",").last.split(/\W+/)[1], :zipcode => gym_doc.search(".fe_vbig_row+ .fe_big_row td").children.to_s.split(/\W/).last, :phone => gym_doc.search(".fe_big_row:nth-child(4) td").children.last.to_s[1..-1], :director => gym_doc.search(".fe_big_row:nth-child(2) td+ td").text, :coach => gym_doc.search(".fe_big_row+ .fe_big_row td+ td").text, :website => gym_doc.text.split("site:")[1].split("\r").first[1..-1], :usaw_url => gym_row.search("a").first.attr("onclick").match(/\/V.+true/)[0] }) end |
.scrape_main ⇒ Object
5 6 7 8 9 10 11 12 |
# File 'lib/closest_weightlifting_gem/scraper.rb', line 5 def self.scrape_main puts "Fetching index..." index = Nokogiri::HTML(open("#{BASE_URL}/Clubs.wp?frm=t")) get_state_abbreviations(index).each { |state| scrape_state_page(state) } puts "\n\nSorry that took so long." end |
.scrape_state_page(state) ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/closest_weightlifting_gem/scraper.rb', line 14 def self.scrape_state_page(state) puts "Fetching gym data in #{state}..." state_doc = Nokogiri::HTML(open("#{BASE_URL}/Clubs.wp?frm=t&CompanyState=#{state}")) # I want it to scrape each state page # I want it to insantiate and save gym objects for each gym on the page # This will just be basic info and I can set other data in the gym class state_doc.search(".datarow").each do |gym_row| if gym_row.search(".right+ .left").text.split(" ").size < 5 scrape_gym_page(gym_row) else ClosestWeightliftingGem::Gym.new({ :name => gym_row.search("a").first.children.text.titleize, :street => gym_row.children[5].children[0].text, :city => gym_row.children[5].children[2].text.split(",").first, :state => state, :zipcode => gym_row.children[5].children[2].text.split(/\W+/).last, :phone => gym_row.children[5].children[4].text, :usaw_url => gym_row.search("a").first.attr("onclick").match(/\/V.+true/)[0] }) end end end |