Class: Linkedin::Profile

Inherits:
Object
  • Object
show all
Defined in:
lib/linkedin-scraper/profile.rb

Constant Summary collapse

ATTRIBUTES =
%w(
name
first_name
last_name
title
location
number_of_connections
country
industry
summary
picture
projects
linkedin_url
education
groups
websites
languages
skills
certifications
organizations
past_companies
current_companies
recommended_visitors )

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Profile

Returns a new instance of Profile.



30
31
32
33
34
# File 'lib/linkedin-scraper/profile.rb', line 30

def initialize(url, options = {})
  @linkedin_url = url
  @options = options
  @page = http_client.get(url)
end

Instance Attribute Details

#linkedin_urlObject (readonly)

Returns the value of attribute linkedin_url.



28
29
30
# File 'lib/linkedin-scraper/profile.rb', line 28

def linkedin_url
  @linkedin_url
end

#pageObject (readonly)

Returns the value of attribute page.



28
29
30
# File 'lib/linkedin-scraper/profile.rb', line 28

def page
  @page
end

Instance Method Details

#certificationsObject



152
153
154
155
156
157
158
159
160
161
# File 'lib/linkedin-scraper/profile.rb', line 152

def certifications
  @certifications ||= @page.search('background-certifications').map do |item|
    name = item.at('h4').text.gsub(/\s+|\n/, ' ').strip rescue nil
    authority = item.at('h5').text.gsub(/\s+|\n/, ' ').strip rescue nil
    license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
    start_date = item.at('.certification-date').text.gsub(/\s+|\n/, ' ').strip rescue nil

    { name: name, authority: authority, license: license, start_date: start_date }
  end
end

#countryObject



56
57
58
# File 'lib/linkedin-scraper/profile.rb', line 56

def country
  @country ||= (@page.at('.locality').text.split(',').last.strip if @page.at('.locality'))
end

#current_companiesObject



91
92
93
# File 'lib/linkedin-scraper/profile.rb', line 91

def current_companies
  @current_companies ||= get_companies.find_all { |c| c[:end_date] == 'Present' }
end

#educationObject



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/linkedin-scraper/profile.rb', line 95

def education
  @education ||= @page.search('.schools .school').map do |item|
    name = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
    desc = item.search('h5').last.text.gsub(/\s+|\n/, ' ').strip if item.search('h5').last
    if item.search('h5').last.at('.degree')
      degree = item.search('h5').last.at('.degree').text.gsub(/\s+|\n/, ' ').strip.gsub(/,$/, '')
    end
    major = item.search('h5').last.at('.major').text.gsub(/\s+|\n/, ' ').strip if item.search('h5').last.at('.major')
    period = item.at('.date-range').text.gsub(/\s+|\n/, ' ').strip if item.at('.date-range')
    start_date, end_date = item.at('.date-range').text.gsub(/\s+|\n/, ' ').strip.split('') rescue nil

    {
        name: name,
        description: desc,
        degree: degree,
        major: major,
        period: period,
        start_date: start_date,
        end_date: end_date
    }
  end
end

#first_nameObject



40
41
42
# File 'lib/linkedin-scraper/profile.rb', line 40

def first_name
  @first_name ||= (@page.at('.fn').text.split(' ', 2)[0].strip if @page.at('.fn'))
end

#groupsObject



125
126
127
128
129
130
131
132
# File 'lib/linkedin-scraper/profile.rb', line 125

def groups
  @groups ||= @page.search('#groups .group .item-title').map do |item|
    name = item.text.gsub(/\s+|\n/, ' ').strip
    link = item.at('a')['href']

    { name: name, link: link }
  end
end

#industryObject



66
67
68
69
70
# File 'lib/linkedin-scraper/profile.rb', line 66

def industry
  if @page.at('#demographics .descriptor')
    @industry ||= (@page.search('#demographics .descriptor')[-1].text.gsub(/\s+/, ' ').strip)
  end
end

#languagesObject



144
145
146
147
148
149
150
# File 'lib/linkedin-scraper/profile.rb', line 144

def languages
  @languages ||= @page.search('.background-languages #languages ol li').map do |item|
    language = item.at('h4').text rescue nil
    proficiency = item.at('div.languages-proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
    { language: language, proficiency: proficiency }
  end
end

#last_nameObject



44
45
46
# File 'lib/linkedin-scraper/profile.rb', line 44

def last_name
  @last_name ||= (@page.at('.fn').text.split(' ', 2)[1].strip if @page.at('.fn'))
end

#locationObject



52
53
54
# File 'lib/linkedin-scraper/profile.rb', line 52

def location
  @location ||= (@page.at('.locality').text.split(',').first.strip if @page.at('.locality'))
end

#nameObject



36
37
38
# File 'lib/linkedin-scraper/profile.rb', line 36

def name
  "#{first_name} #{last_name}"
end

#number_of_connectionsObject



60
61
62
63
64
# File 'lib/linkedin-scraper/profile.rb', line 60

def number_of_connections
  if @page.at('.member-connections')
    @connections ||= (@page.at('.member-connections').text.match(/[0-9]+[\+]{0,1}/)[0])
  end
end

#organizationsObject



134
135
136
137
138
139
140
141
142
# File 'lib/linkedin-scraper/profile.rb', line 134

def organizations
  @organizations ||= @page.search('#background-organizations .section-item').map do |item|
    name = item.at('.summary').text.gsub(/\s+|\n/, ' ').strip rescue nil
    start_date, end_date = item.at('.organizations-date').text.gsub(/\s+|\n/, ' ').strip.split('') rescue nil
    start_date = Date.parse(start_date) rescue nil
    end_date = Date.parse(end_date) rescue nil
    {name: name, start_date: start_date, end_date: end_date}
  end
end

#past_companiesObject



87
88
89
# File 'lib/linkedin-scraper/profile.rb', line 87

def past_companies
  @past_companies ||= get_companies.reject { |c| c[:end_date] == 'Present' }
end

#pictureObject



76
77
78
79
80
81
# File 'lib/linkedin-scraper/profile.rb', line 76

def picture
  if @page.at('.profile-picture img')
    @picture ||= @page.at('.profile-picture img').attributes.values_at('src', 'data-delayed-url').
        compact.first.value.strip
  end
end

#projectsObject



176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/linkedin-scraper/profile.rb', line 176

def projects
  @projects ||= @page.search('#projects .project').map do |node|
    project = {}
    start_date, end_date = node.at('.date-range').text.gsub(/\s+|\n/, ' ').strip.split('') rescue nil

    project[:title] = node.at('.item-title').text
    project[:link] = CGI.parse(URI.parse(node.at('.item-title a')['href']).query)['url'][0] rescue nil
    project[:start_date] = parse_date(start_date) rescue nil
    project[:end_date] = parse_date(end_date) rescue nil
    project[:description] = node.at('.description').children().to_s rescue nil
    project[:associates] = node.search('.contributors .contributor').map { |c| c.at('a').text } rescue nil
    project
  end
end


164
165
166
167
168
169
170
171
172
173
174
# File 'lib/linkedin-scraper/profile.rb', line 164

def recommended_visitors
  @recommended_visitors ||= @page.search('.insights .browse-map/ul/li.profile-card').map do |node|
    visitor = {}
    visitor[:link] = node.at('a')['href']
    visitor[:name] = node.at('h4/a').text
    if node.at('.headline')
      visitor[:title], visitor[:company], _ = node.at('.headline').text.gsub('...', ' ').split(' at ')
    end
    visitor
  end
end

#skillsObject



83
84
85
# File 'lib/linkedin-scraper/profile.rb', line 83

def skills
  @skills ||= (@page.search('.pills .skill:not(.see-less)').map { |skill| skill.text.strip if skill.text } rescue nil)
end

#summaryObject



72
73
74
# File 'lib/linkedin-scraper/profile.rb', line 72

def summary
  @summary ||= (@page.at('#summary .description').text.gsub(/\s+/, ' ').strip if @page.at('#summary .description'))
end

#titleObject



48
49
50
# File 'lib/linkedin-scraper/profile.rb', line 48

def title
  @title ||= (@page.at('.title').text.gsub(/\s+/, ' ').strip if @page.at('.title'))
end

#to_jsonObject



191
192
193
194
# File 'lib/linkedin-scraper/profile.rb', line 191

def to_json
  require 'json'
  ATTRIBUTES.reduce({}) { |hash, attr| hash[attr.to_sym] = self.send(attr.to_sym); hash }.to_json
end

#websitesObject



118
119
120
121
122
123
# File 'lib/linkedin-scraper/profile.rb', line 118

def websites
  @websites ||= @page.search('.websites li').flat_map do |site|
    url = site.at('a')['href']
    CGI.parse(URI.parse(url).query)['url']
  end
end