Class: NbaDraft2017::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/nba_draft_2017/scraper.rb

Class Method Summary collapse

Class Method Details

.get_age(dob) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/nba_draft_2017/scraper.rb', line 69

def self.get_age(dob)
  b_year= dob.split(' ')[2].strip.to_i
  b_month = Date::MONTHNAMES.index(dob.split(' ')[0].strip)
  b_day = dob.split(' ')[1].chop.strip.to_i

  age = Date.today.year - b_year - ((Date.today.month > b_month || (Date.today.month == b_month && Date.today.day >= b_day)) ? 0 : 1)
end

.get_key_stats(player_page) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/nba_draft_2017/scraper.rb', line 77

def self.get_key_stats(player_page)
  stats = nil
  idx = 10
  while idx <= 14 && stats == nil
    if player_page.css(".field-items p[#{idx}] strong").text.downcase.strip == "key statistics:"
      stats = player_page.css(".field-items p[#{idx}]").text.split(':')[1].strip
    end
    idx += 1
  end
  stats
end

.scrape_draftObject



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/nba_draft_2017/scraper.rb', line 4

def self.scrape_draft
  doc = Nokogiri::HTML(open('http://www.cbssports.com/nba/news/2017-nba-draft-picks-complete-results-full-list-of-players-selected-highlights-grades/', :allow_redirections => :safe))
  draft = doc.xpath('//*[@id="article-main-body"]/div/ol')
  players = []
  pick = '1'

  i = 0
  while i < 60
    player = {}
    player_info = draft.css('li p')[i]

    player[:pick] = pick.to_s
    player[:nba_team] = player_info.css('strong').text.gsub("\u00A0", '').gsub(':', '').strip
    player[:former_team] = player_info.text.split('-')[-1].gsub("\u00A0", '').gsub(':', '').strip
    details = player_info.text.split(' ')
    player[:position] = details. detect { |d| d.include?('(') }.strip

    if i < 30
      player[:round] = '1'

      if details[0].include?(':')
        player[:first_name] = details[0].split(':')[1].gsub(/\W/, "").strip
        player[:last_name] = details[1].gsub(/\W/, "").strip
      else
        player[:first_name] = details[1].split(':')[1].gsub(/\W/, "").strip
        player[:last_name] = details[2].gsub(/\W/, "").strip
      end

    elsif i >= 30
      player[:round] = '2'

      player[:first_name] = details[1].gsub(/\W/, "").strip
      player[:last_name] = details[2].gsub(/\W/, "").strip
    end

    player[:name] = player[:first_name].strip + ' ' + player[:last_name].strip
    if player[:first_name] == 'Andzejs'
      player[:profile_url] = 'anzejs_pasecniks'
    else
      player[:profile_url] = player[:first_name].gsub(/\W/, '').downcase + '_' + player[:last_name].gsub(/\W/, '').downcase
    end
    NbaDraft2017::Player.new(player)
    pick = pick.to_i + 1
    i += 1
  end
end

.scrape_player(profile_url) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/nba_draft_2017/scraper.rb', line 51

def self.scrape_player(profile_url)
  player_page = Nokogiri::HTML(open(profile_url, :allow_redirections => :safe))
  player = {}
  ht_weight = player_page.css('.stats').text.split(':')[1]

  player[:height] = ht_weight.split('/')[0].gsub("\"", "").strip if ht_weight
  player[:weight] = ht_weight.split('/')[1].strip if ht_weight
  player[:former_status] = player_page.css('.status').text.split(':')[1].strip

  dob = player_page.css('.birthday').text.split(':')[1].strip
  player[:age] = get_age(dob) if dob

  stats = get_key_stats(player_page)
  split_key_stats(stats, player) if stats

  player
end

.split_key_stats(stats, player) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/nba_draft_2017/scraper.rb', line 89

def self.split_key_stats(stats, player)
  player[:key_stats] = stats.strip.split(',').collect do |stat|

    if stat.downcase.include?('ppg')
      player[:ppg] = stat.split(' ')[0].strip.to_f
      player[:ppg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('rpg')
      player[:rpg] = stat.split(' ')[0].strip.to_f
      player[:rpg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('apg')
      player[:apg] = stat.split(' ')[0].strip.to_f
      player[:apg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('tpg')
      player[:tpg] = stat.split(' ')[0].strip.to_f
      player[:tpg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('spg')
      player[:spg] = stat.split(' ')[0].strip.to_f
      player[:spg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('bpg')
      player[:bpg] = stat.split(' ')[0].strip.to_f
      player[:bpg].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('mpg')
      player[:mpg] = stat.split(' ')[0].strip.to_f
      player[:mpg].to_s + ' ' + stat.split(' ')[1].strip
     elsif stat.downcase.include?('fg')
      player[:fg] = stat.split(' ')[0].strip.to_f
      player[:FG].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('3pt')
      player[:three] = stat.split(' ')[0].strip.to_f
      player[:_3PT].to_s + ' ' + stat.split(' ')[1].strip
    elsif stat.downcase.include?('ft')
      player[:ft] = stat.split(' ')[0].strip.to_f
      player[:FT].to_s + ' ' + stat.split(' ')[1].strip
    end
  end
end