Class: Linkedin::Profile

Inherits:
Object
  • Object
show all
Defined in:
lib/linkedin-scraper/profile.rb

Constant Summary collapse

USER_AGENTS =
['Windows IE 6', 'Windows IE 7', 'Windows Mozilla', 'Mac Safari', 'Mac FireFox', 'Mac Mozilla', 'Linux Mozilla', 'Linux Firefox', 'Linux Konqueror']
ATTRIBUTES =
%w(name first_name last_name title location country industry summary picture linkedin_url education groups websites languages skills certifications organizations past_companies current_companies recommended_visitors)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Profile



19
20
21
22
# File 'lib/linkedin-scraper/profile.rb', line 19

def initialize(url)
  @linkedin_url = url
  @page         = http_client.get(url)
end

Instance Attribute Details

#linkedin_urlObject (readonly)

Returns the value of attribute linkedin_url.



9
10
11
# File 'lib/linkedin-scraper/profile.rb', line 9

def linkedin_url
  @linkedin_url
end

#pageObject (readonly)

Returns the value of attribute page.



9
10
11
# File 'lib/linkedin-scraper/profile.rb', line 9

def page
  @page
end

Class Method Details

.get_profile(url) ⇒ Object



11
12
13
14
15
16
17
# File 'lib/linkedin-scraper/profile.rb', line 11

def self.get_profile(url)
  begin
    Linkedin::Profile.new(url)
  rescue => e
    puts e
  end
end

Instance Method Details

#certificationsObject



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/linkedin-scraper/profile.rb', line 147

def certifications
  unless @certtifications
    @certifications = []
    if @page.at('ul.certifications/li.certification')
      @certifications = @page.search('ul.certifications/li.certification').map do |item|
        name       = item.at('h3').text.gsub(/\s+|\n/, ' ').strip                         rescue nil
        authority  = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip            rescue nil
        license    = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
        start_date = item.at('.specifics/.dtstart').text.gsub(/\s+|\n/, ' ').strip        rescue nil

        {:name => name, :authority => authority, :license => license, :start_date => start_date}
      end
    end
  end
  @certifications
end

#countryObject



44
45
46
# File 'lib/linkedin-scraper/profile.rb', line 44

def country
  @country ||= (@page.at('.locality').text.split(',').last.strip if @page.at('.locality'))
end

#current_companiesObject



68
69
70
# File 'lib/linkedin-scraper/profile.rb', line 68

def current_companies
  @current_companies ||= get_companies('current')
end

#educationObject



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/linkedin-scraper/profile.rb', line 72

def education
  unless @education
    @education = []
    if @page.search('.position.education.vevent.vcard').first
      @education = @page.search('.position.education.vevent.vcard').map do |item|
        name   = item.at('h3').text.gsub(/\s+|\n/, ' ').strip      if item.at('h3')
        desc   = item.at('h4').text.gsub(/\s+|\n/, ' ').strip      if item.at('h4')
        period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')

        {:name => name, :description => desc, :period => period}
      end
    end
  end
  @education
end

#first_nameObject



28
29
30
# File 'lib/linkedin-scraper/profile.rb', line 28

def first_name
  @first_name ||= (@page.at('.given-name').text.strip if @page.at('.given-name'))
end

#groupsObject



102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/linkedin-scraper/profile.rb', line 102

def groups
  unless @groups
    @groups = []
    if page.search('.group-data').first
      @groups = page.search('.group-data').map do |item|
        name = item.text.gsub(/\s+|\n/, ' ').strip
        link = "http://www.linkedin.com#{item.at('a')['href']}"
        {:name => name, :link => link}
      end
    end
  end
  @groups
end

#industryObject



48
49
50
# File 'lib/linkedin-scraper/profile.rb', line 48

def industry
  @industry ||= (@page.at('.industry').text.gsub(/\s+/, ' ').strip if @page.at('.industry'))
end

#languagesObject



133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/linkedin-scraper/profile.rb', line 133

def languages
  unless @languages
    @languages = []
    if @page.at('ul.languages/li.language')
      @languages = @page.search('ul.languages/li.language').map do |item|
        language    = item.at('h3').text rescue nil
        proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
        {:language=> language, :proficiency => proficiency }
      end
    end
  end
  @languages
end

#last_nameObject



32
33
34
# File 'lib/linkedin-scraper/profile.rb', line 32

def last_name
  @last_name ||= (@page.at('.family-name').text.strip if @page.at('.family-name'))
end

#locationObject



40
41
42
# File 'lib/linkedin-scraper/profile.rb', line 40

def location
  @location ||= (@page.at('.locality').text.split(',').first.strip if @page.at('.locality'))
end

#nameObject



24
25
26
# File 'lib/linkedin-scraper/profile.rb', line 24

def name
  "#{first_name} #{last_name}"
end

#organizationsObject



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/linkedin-scraper/profile.rb', line 116

def organizations
  unless @organizations
    @organizations = []
    if @page.search('ul.organizations/li.organization').first
      @organizations = @page.search('ul.organizations/li.organization').map do |item|

        name       = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
        start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
        start_date = Date.parse(start_date) rescue nil
        end_date   = Date.parse(end_date)   rescue nil
        {:name => name, :start_date => start_date, :end_date => end_date}
      end
    end
  end
  @organizations
end

#past_companiesObject



64
65
66
# File 'lib/linkedin-scraper/profile.rb', line 64

def past_companies
  @past_companies ||= get_companies('past')
end

#pictureObject



56
57
58
# File 'lib/linkedin-scraper/profile.rb', line 56

def picture
  @picture ||= (@page.at('#profile-picture/img.photo').attributes['src'].value.strip if @page.at('#profile-picture/img.photo'))
end


165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/linkedin-scraper/profile.rb', line 165

def recommended_visitors
  unless @recommended_visitors
    @recommended_visitors = []
    if @page.at('.browsemap/.content/ul/li')
      @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
        v = {}
        v[:link]    = visitor.at('a')['href']
        v[:name]    = visitor.at('strong/a').text
        v[:title]   = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
        v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
        v
      end
    end
  end
  @recommended_visitors
end

#skillsObject



60
61
62
# File 'lib/linkedin-scraper/profile.rb', line 60

def skills
  @skills ||= (@page.search('.competency.show-bean').map{|skill| skill.text.strip if skill.text} rescue nil)
end

#summaryObject



52
53
54
# File 'lib/linkedin-scraper/profile.rb', line 52

def summary
  @summary ||= (@page.at('.description.summary').text.gsub(/\s+/, ' ').strip if @page.at('.description.summary'))
end

#titleObject



36
37
38
# File 'lib/linkedin-scraper/profile.rb', line 36

def title
  @title ||= (@page.at('.headline-title').text.gsub(/\s+/, ' ').strip if @page.at('.headline-title'))
end

#to_jsonObject



182
183
184
185
# File 'lib/linkedin-scraper/profile.rb', line 182

def to_json
  require 'json'
  ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
end

#websitesObject



88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/linkedin-scraper/profile.rb', line 88

def websites
  unless @websites
    @websites = []
    if @page.search('.website').first
      @websites = @page.search('.website').map do |site|
        url = site.at('a')['href']
        url = "http://www.linkedin.com#{url}"
        CGI.parse(URI.parse(url).query)['url']
      end.flatten!
    end
  end
  @websites
end