Class: Spackler::PGA
- Inherits:
-
Object
- Object
- Spackler::PGA
- Defined in:
- lib/spackler.rb
Instance Method Summary collapse
- #fetch(url, incl_missed_cut = false) ⇒ Object
- #friendly_structure(player_data) ⇒ Object
- #get_urls(year) ⇒ Object
- #tourney_info(url) ⇒ Object
Instance Method Details
#fetch(url, incl_missed_cut = false) ⇒ Object
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/spackler.rb', line 175 def fetch(url, incl_missed_cut=false) doc = Nokogiri::HTML(open(url)) player_data = [] cells = [] #made cut doc.css('table.altleaderboard').each do |table| #altleaderboard #puts table #if table.attributes['class'] == 'altleaderboard' table.css('tr').each do |row| row.css('td').each do |cel| innertext = cel.inner_text.strip() cells << innertext.to_ascii_iconv end player_data << cells cells = [] end #end end if incl_missed_cut doc.css('table.altleaderboard2').each do |table| if table.attributes['class'] == 'altleaderboard2' table.css('tr').each do |row| row.css('td').each do |cel| innertext = cel.inner_text.strip().to_ascii_iconv cells << innertext end player_data << cells cells = [] end end end end player_data end |
#friendly_structure(player_data) ⇒ Object
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/spackler.rb', line 214 def friendly_structure player_data # take player_data and turn it into array of Ostructs players = [] player_data.each do |p| next unless (p.length > 0 && p[0] != "Pos") playa = OpenStruct.new # extract data from PGA cells: playa.money = p[0] playa.pos = p[1] playa.start = p[2] playa.name = p[3] this_player = Player.new(playa.name) playa.fname = this_player.fname playa.lname = this_player.lname playa.today = p[4] playa.thru = p[5] playa.to_par = p[6] playa.r1 = p[7] playa.r2 = p[8] playa.r3 = p[9] playa.r4 = p[10] playa.total = p[11] players << playa end return players end |
#get_urls(year) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/spackler.rb', line 86 def get_urls(year) if year == 2007 urls = [] elsif year == 2008 # diff format: r476 urls = %w( r045 r060 r505 r029 r032 r028 r020 r480 r023 r034 r035 r030 r003 r004 r483 r018 r054 r481 r012 r019 r022 r021 r025 r471 r472 r013 r041 r047 r464 r482 r475 r010 r457 r007 r005 r027 ).map { |t| "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html" } elsif year == 2009 urls = %w( r016 r006 r002 r003 r004 r005 r007 r457 r473 r475 r009 r020 ).map { |t| "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html" } elsif year == 2010 urls = %w( r032 r016 ).map { |t| "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html" } else urls = [] end urls end |
#tourney_info(url) ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/spackler.rb', line 117 def tourney_info(url) # tournament name, dates, golf course, location # <div class="tourTournSubName">Mayakoba Golf Classic at Riviera Maya-Cancun</div> # <div class="tourTournNameDates">Thursday Feb 21 – Sunday Feb 24, 2008</div> # <div class="tourTournHeadLinks">El Camaleon Golf Club · Playa del Carmen, Quintana Roo, Mexico</div> # <div class="tourTournLogo"> # <img src="/.element/img/3.0/sect/tournaments/r457/tourn_logo.gif"/> # </div> doc = Nokogiri::HTML(open(url)) tourn = OpenStruct.new #array of hash literals for those that can't be scraped tourn_misfits = [ {:name => "The Barclays"}, {:name => "BMW Championship"}, {:name => "The Tour Championship"}, {:name => "Deutsche Bank Championship"}, {:name => "ca Championship"} ] true_or_false = (doc.css('div.tourTournSubName').first == nil) if true_or_false # name doesn't exist in markup, therefore lookup in hash if url == "http://www.pgatour.com/leaderboards/current/r027/alt-1.html" tourn.name = tourn_misfits[0][:name] elsif url == "http://www.pgatour.com/leaderboards/current/r028/alt-1.html" tourn.name = tourn_misfits[1][:name] elsif url == "http://www.pgatour.com/leaderboards/current/r060/alt-1.html" tourn.name = tourn_misfits[2][:name] elsif url == "http://www.pgatour.com/leaderboards/current/r505/alt-1.html" tourn.name = tourn_misfits[3][:name] elsif url == "http://www.pgatour.com/leaderboards/current/r473/alt-1.html" tourn.name = tourn_misfits[4][:name] end else tourn.name = doc.css('div.tourTournSubName').first.inner_text.strip().to_ascii_iconv #.gsub!(/'/, "") end # tourn.dates = "March 9 - 15, 2009" # tourn.course = "Doral Golf Resort and Spa" if doc.css('div.tourTournNameDates').first == nil #some leaderboards have different formats: tourn.dates = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[0] tourn.course = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[1]#.gsub!(/'/, "") else tourn.dates = doc.css('div.tourTournNameDates').first.inner_text.strip().to_ascii_iconv #unless doc.css('div.tourTournNameDates') == nil tourn.course = doc.css('div.tourTournHeadLinks').first.inner_text.strip().to_ascii_iconv#gsub!(/'/, "") #unless doc.css('div.tourTournHeadLinks') == nil #tourn.img = doc.css('div.tourTournLogo').first.inner_html end tourn.name = tourn.name.gsub(/'/, '') tourn.course = tourn.course.gsub(/'/, '') puts "scraped Tourney Name: #{tourn.name}" tourn end |