Class: Spackler::PGA

Inherits:
Object
  • Object
show all
Defined in:
lib/spackler.rb

Instance Method Summary collapse

Instance Method Details

#fetch(url, incl_missed_cut = false) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/spackler.rb', line 175

def fetch(url, incl_missed_cut=false)
  doc = Nokogiri::HTML(open(url))

  player_data = []
  cells = []

  #made cut
  doc.css('table.altleaderboard').each do |table| #altleaderboard
    #puts table
    #if table.attributes['class'] == 'altleaderboard'
      table.css('tr').each do |row|
        row.css('td').each do |cel|
          innertext = cel.inner_text.strip()
          cells << innertext.to_ascii_iconv
        end
        player_data << cells
        cells = []
      end
    #end
  end

  if incl_missed_cut
    doc.css('table.altleaderboard2').each do |table|
      if table.attributes['class'] == 'altleaderboard2'
        table.css('tr').each do |row|
          row.css('td').each do |cel|
            innertext = cel.inner_text.strip().to_ascii_iconv
            cells << innertext
          end
          player_data << cells
          cells = []
        end
      end
    end 
  end   

  player_data
end

#friendly_structure(player_data) ⇒ Object



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/spackler.rb', line 214

def friendly_structure player_data
  # take player_data and turn it into array of Ostructs
  players = []
  player_data.each do |p|
    next unless (p.length > 0 && p[0] != "Pos")
    playa = OpenStruct.new
    # extract data from PGA cells:
    playa.money = p[0]
    playa.pos = p[1]
    playa.start = p[2]
    playa.name = p[3]
    this_player = Player.new(playa.name)
    playa.fname = this_player.fname
    playa.lname = this_player.lname
    playa.today = p[4]
    playa.thru = p[5]
    playa.to_par = p[6]
    playa.r1 = p[7] 
    playa.r2 = p[8]
    playa.r3 = p[9]
    playa.r4 = p[10]
    playa.total = p[11]
    players << playa
  end

  return players
end

#get_urls(year) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/spackler.rb', line 86

def get_urls(year)
  if year == 2007
    urls = []
  elsif year == 2008
    # diff format: r476 
    urls = %w(
              r045 r060 r505 r029 r032 r028 r020 r480 r023 r034 r035 r030
              r003 r004 r483 r018 r054 r481 r012 r019 r022 r021 r025 r471 
              r472 r013 r041 r047 r464 r482 r475 r010 r457 r007 r005 r027  
            ).map { |t|
              "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
            }
  elsif year == 2009
    urls = %w(
              r016 r006 r002 r003 r004 r005 r007 r457 r473 r475 r009 r020
            ).map { |t|
              "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
            }
  elsif year == 2010
    urls = %w(
              r032 r016
            ).map { |t|
              "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
            }
  else
    urls = []
  end

  urls
end

#tourney_info(url) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/spackler.rb', line 117

def tourney_info(url)
  # tournament name, dates, golf course, location
    # <div class="tourTournSubName">Mayakoba Golf Classic at Riviera Maya-Cancun</div>
    # <div class="tourTournNameDates">Thursday Feb 21 – Sunday Feb 24, 2008</div>
    # <div class="tourTournHeadLinks">El Camaleon Golf Club · Playa del Carmen, Quintana Roo, Mexico</div>
    # <div class="tourTournLogo">
    #   <img src="/.element/img/3.0/sect/tournaments/r457/tourn_logo.gif"/>
    # </div>

    doc = Nokogiri::HTML(open(url))
    tourn = OpenStruct.new

    #array of hash literals for those that can't be scraped 
    tourn_misfits = [
      {:name => "The Barclays"},
      {:name => "BMW Championship"},
      {:name => "The Tour Championship"},
      {:name => "Deutsche Bank Championship"},
      {:name => "ca Championship"}
    ]

    true_or_false = (doc.css('div.tourTournSubName').first == nil)
    if true_or_false
      # name doesn't exist in markup, therefore lookup in hash
      if url == "http://www.pgatour.com/leaderboards/current/r027/alt-1.html"
        tourn.name = tourn_misfits[0][:name]
      elsif url == "http://www.pgatour.com/leaderboards/current/r028/alt-1.html"
        tourn.name = tourn_misfits[1][:name]
      elsif url == "http://www.pgatour.com/leaderboards/current/r060/alt-1.html"
        tourn.name = tourn_misfits[2][:name]
      elsif url == "http://www.pgatour.com/leaderboards/current/r505/alt-1.html"
        tourn.name = tourn_misfits[3][:name]
      elsif url == "http://www.pgatour.com/leaderboards/current/r473/alt-1.html"
        tourn.name = tourn_misfits[4][:name]
      end
    else
      tourn.name = doc.css('div.tourTournSubName').first.inner_text.strip().to_ascii_iconv #.gsub!(/'/, "")
    end   

    # tourn.dates = "March 9 - 15, 2009"
    # tourn.course = "Doral Golf Resort and Spa"
    if doc.css('div.tourTournNameDates').first == nil
      #some leaderboards have different formats:
      tourn.dates = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[0]
      tourn.course = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[1]#.gsub!(/'/, "")
    else
      tourn.dates = doc.css('div.tourTournNameDates').first.inner_text.strip().to_ascii_iconv #unless doc.css('div.tourTournNameDates') == nil 
      tourn.course = doc.css('div.tourTournHeadLinks').first.inner_text.strip().to_ascii_iconv#gsub!(/'/, "") #unless doc.css('div.tourTournHeadLinks') == nil
      #tourn.img = doc.css('div.tourTournLogo').first.inner_html
    end

    tourn.name = tourn.name.gsub(/'/, '')
    tourn.course = tourn.course.gsub(/'/, '')
    puts "scraped Tourney Name: #{tourn.name}"

    tourn
end