Class: Linkedin::Profile

Inherits:
Object
  • Object
show all
Defined in:
lib/linkedin_scraper/profile.rb

Constant Summary collapse

USER_AGENTS =
["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
ATTRIBUTES =
%w(
name
first_name
last_name
title
location
number_of_connections
country
industry
summary
picture
projects
linkedin_url
education
groups
websites
languages
skills
certifications
organizations
past_companies
current_companies
recommended_visitors)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Profile

Returns a new instance of Profile.



38
39
40
41
42
# File 'lib/linkedin_scraper/profile.rb', line 38

def initialize(url, options = {})
  @linkedin_url = url
  @options = options
  @page = http_client.get(url)
end

Instance Attribute Details

#linkedin_urlObject (readonly)

Returns the value of attribute linkedin_url.



30
31
32
# File 'lib/linkedin_scraper/profile.rb', line 30

def linkedin_url
  @linkedin_url
end

#pageObject (readonly)

Returns the value of attribute page.



30
31
32
# File 'lib/linkedin_scraper/profile.rb', line 30

def page
  @page
end

Class Method Details

.get_profile(url, options = {}) ⇒ Object



32
33
34
35
36
# File 'lib/linkedin_scraper/profile.rb', line 32

def self.get_profile(url, options = {})
  Linkedin::Profile.new(url, options)
rescue => e
  puts e
end

Instance Method Details

#certificationsObject



141
142
143
144
145
146
147
148
149
150
# File 'lib/linkedin_scraper/profile.rb', line 141

def certifications
  @certifications ||= @page.search("background-certifications").map do |item|
    name       = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil
    authority  = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil
    license    = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil
    start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil

    { :name => name, :authority => authority, :license => license, :start_date => start_date }
  end
end

#countryObject



68
69
70
# File 'lib/linkedin_scraper/profile.rb', line 68

def country
  @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
end

#current_companiesObject



92
93
94
# File 'lib/linkedin_scraper/profile.rb', line 92

def current_companies
  @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"}
end

#educationObject



96
97
98
99
100
101
102
103
104
105
106
# File 'lib/linkedin_scraper/profile.rb', line 96

def education
  @education ||= @page.search(".schools .school").map do |item|
    name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
    desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
    degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
    major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip      if item.search("h5").last.at(".major")
    period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range")
    start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
    {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
  end
end

#first_nameObject



48
49
50
# File 'lib/linkedin_scraper/profile.rb', line 48

def first_name
  @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn"))
end

#groupsObject



115
116
117
118
119
120
121
# File 'lib/linkedin_scraper/profile.rb', line 115

def groups
  @groups ||= @page.search("#groups .group .item-title").map do |item|
    name = item.text.gsub(/\s+|\n/, " ").strip
    link = item.at("a")['href']
    { :name => name, :link => link }
  end
end

#industryObject



72
73
74
# File 'lib/linkedin_scraper/profile.rb', line 72

def industry
  @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor"))
end

#languagesObject



133
134
135
136
137
138
139
# File 'lib/linkedin_scraper/profile.rb', line 133

def languages
  @languages ||= @page.search(".background-languages #languages ol li").map do |item|
    language = item.at("h4").text rescue nil
    proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil
    { :language => language, :proficiency => proficiency }
  end
end

#last_nameObject



52
53
54
# File 'lib/linkedin_scraper/profile.rb', line 52

def last_name
  @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn"))
end

#locationObject



60
61
62
# File 'lib/linkedin_scraper/profile.rb', line 60

def location
  @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
end

#nameObject



44
45
46
# File 'lib/linkedin_scraper/profile.rb', line 44

def name
  "#{first_name} #{last_name}"
end

#number_of_connectionsObject



64
65
66
# File 'lib/linkedin_scraper/profile.rb', line 64

def number_of_connections
  @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections")
end

#organizationsObject



123
124
125
126
127
128
129
130
131
# File 'lib/linkedin_scraper/profile.rb', line 123

def organizations
  @organizations ||= @page.search("#background-organizations .section-item").map do |item|
    name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil
    start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
    start_date = Date.parse(start_date) rescue nil
    end_date = Date.parse(end_date)   rescue nil
    { :name => name, :start_date => start_date, :end_date => end_date }
  end
end

#past_companiesObject



88
89
90
# File 'lib/linkedin_scraper/profile.rb', line 88

def past_companies
  @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"}
end

#pictureObject



80
81
82
# File 'lib/linkedin_scraper/profile.rb', line 80

def picture
  @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img'))
end

#projectsObject



166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/linkedin_scraper/profile.rb', line 166

def projects
  @projects ||= @page.search("#projects .project").map do |project|
    p = {}
    start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil

    p[:title] = project.at(".item-title").text
    p[:link] =  CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
    p[:start_date] = parse_date(start_date) rescue nil
    p[:end_date] = parse_date(end_date)  rescue nil
    p[:description] = project.at(".description").text rescue nil
    p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
    p
  end
end


153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/linkedin_scraper/profile.rb', line 153

def recommended_visitors
  @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor|
    v = {}
    v[:link] = visitor.at("a")["href"]
    v[:name] = visitor.at("h4/a").text
    if visitor.at(".headline")
      v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first
      v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1]
    end
    v
  end
end

#skillsObject



84
85
86
# File 'lib/linkedin_scraper/profile.rb', line 84

def skills
  @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
end

#summaryObject



76
77
78
# File 'lib/linkedin_scraper/profile.rb', line 76

def summary
  @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description"))
end

#titleObject



56
57
58
# File 'lib/linkedin_scraper/profile.rb', line 56

def title
  @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title"))
end

#to_jsonObject



181
182
183
184
# File 'lib/linkedin_scraper/profile.rb', line 181

def to_json
  require "json"
  ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
end

#websitesObject



108
109
110
111
112
113
# File 'lib/linkedin_scraper/profile.rb', line 108

def websites
  @websites ||= @page.search(".websites li").flat_map do |site|
    url = site.at("a")["href"]
    CGI.parse(URI.parse(url).query)["url"]
  end
end