Class: Linkedin::Profile

Inherits:
Object
  • Object
show all
Defined in:
lib/linkedin_scraper/profile.rb

Constant Summary collapse

USER_AGENTS =

USER_AGENTS = [“Windows IE 6”, “Windows IE 7”, “Windows Mozilla”, “Mac Safari”, “Mac Firefox”, “Mac Mozilla”, “Linux Mozilla”, “Linux Firefox”, “Linux Konqueror”]

[
  "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
  "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0",
  "Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0",
  "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
  "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1",
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1",
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0",
  "Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0",
  "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
  "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
  "Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)",
  "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)",
  "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
  "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
  "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
  "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1"
]
ATTRIBUTES =
%w(
name
first_name
last_name
title
location
number_of_connections
country
industry
summary
picture
projects
linkedin_url
education
groups
websites
languages
skills
certifications
organizations
past_companies
current_companies
recommended_visitors)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Profile

Returns a new instance of Profile.



57
58
59
60
61
# File 'lib/linkedin_scraper/profile.rb', line 57

def initialize(url, options = {})
  @linkedin_url = url
  @options = options
  @page = http_client.get(url)
end

Instance Attribute Details

#linkedin_urlObject (readonly)

Returns the value of attribute linkedin_url.



48
49
50
# File 'lib/linkedin_scraper/profile.rb', line 48

def linkedin_url
  @linkedin_url
end

#pageObject (readonly)

Returns the value of attribute page.



48
49
50
# File 'lib/linkedin_scraper/profile.rb', line 48

def page
  @page
end

Class Method Details

.get_profile(url, options = {}) ⇒ Object

support old version



51
52
53
54
55
# File 'lib/linkedin_scraper/profile.rb', line 51

def self.get_profile(url, options = {})
  Linkedin::Profile.new(url, options)
rescue => e
  puts e
end

Instance Method Details

#certificationsObject



160
161
162
163
164
165
166
167
168
169
# File 'lib/linkedin_scraper/profile.rb', line 160

def certifications
  @certifications ||= @page.search("background-certifications").map do |item|
    name       = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil
    authority  = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil
    license    = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil
    start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil

    { :name => name, :authority => authority, :license => license, :start_date => start_date }
  end
end

#countryObject



87
88
89
# File 'lib/linkedin_scraper/profile.rb', line 87

def country
  @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
end

#current_companiesObject



111
112
113
# File 'lib/linkedin_scraper/profile.rb', line 111

def current_companies
  @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"}
end

#educationObject



115
116
117
118
119
120
121
122
123
124
125
# File 'lib/linkedin_scraper/profile.rb', line 115

def education
  @education ||= @page.search(".schools .school").map do |item|
    name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
    desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
    degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
    major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip      if item.search("h5").last.at(".major")
    period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range")
    start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split("") rescue nil
    {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
  end
end

#first_nameObject



67
68
69
# File 'lib/linkedin_scraper/profile.rb', line 67

def first_name
  @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn"))
end

#groupsObject



134
135
136
137
138
139
140
# File 'lib/linkedin_scraper/profile.rb', line 134

def groups
  @groups ||= @page.search("#groups .group .item-title").map do |item|
    name = item.text.gsub(/\s+|\n/, " ").strip
    link = item.at("a")['href']
    { :name => name, :link => link }
  end
end

#industryObject



91
92
93
# File 'lib/linkedin_scraper/profile.rb', line 91

def industry
  @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor"))
end

#languagesObject



152
153
154
155
156
157
158
# File 'lib/linkedin_scraper/profile.rb', line 152

def languages
  @languages ||= @page.search(".background-languages #languages ol li").map do |item|
    language = item.at("h4").text rescue nil
    proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil
    { :language => language, :proficiency => proficiency }
  end
end

#last_nameObject



71
72
73
# File 'lib/linkedin_scraper/profile.rb', line 71

def last_name
  @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn"))
end

#locationObject



79
80
81
# File 'lib/linkedin_scraper/profile.rb', line 79

def location
  @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
end

#nameObject



63
64
65
# File 'lib/linkedin_scraper/profile.rb', line 63

def name
  "#{first_name} #{last_name}"
end

#number_of_connectionsObject



83
84
85
# File 'lib/linkedin_scraper/profile.rb', line 83

def number_of_connections
  @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections")
end

#organizationsObject



142
143
144
145
146
147
148
149
150
# File 'lib/linkedin_scraper/profile.rb', line 142

def organizations
  @organizations ||= @page.search("#background-organizations .section-item").map do |item|
    name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil
    start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split("") rescue nil
    start_date = Date.parse(start_date) rescue nil
    end_date = Date.parse(end_date)   rescue nil
    { :name => name, :start_date => start_date, :end_date => end_date }
  end
end

#past_companiesObject



107
108
109
# File 'lib/linkedin_scraper/profile.rb', line 107

def past_companies
  @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"}
end

#pictureObject



99
100
101
# File 'lib/linkedin_scraper/profile.rb', line 99

def picture
  @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img'))
end

#projectsObject



185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/linkedin_scraper/profile.rb', line 185

def projects
  @projects ||= @page.search("#projects .project").map do |project|
    p = {}
    start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split("") rescue nil

    p[:title] = project.at(".item-title").text
    p[:link] =  CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
    p[:start_date] = parse_date(start_date) rescue nil
    p[:end_date] = parse_date(end_date)  rescue nil
    p[:description] = project.at(".description").text rescue nil
    p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
    p
  end
end


172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/linkedin_scraper/profile.rb', line 172

def recommended_visitors
  @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor|
    v = {}
    v[:link] = visitor.at("a")["href"]
    v[:name] = visitor.at("h4/a").text
    if visitor.at(".headline")
      v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first
      v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1]
    end
    v
  end
end

#skillsObject



103
104
105
# File 'lib/linkedin_scraper/profile.rb', line 103

def skills
  @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
end

#summaryObject



95
96
97
# File 'lib/linkedin_scraper/profile.rb', line 95

def summary
  @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description"))
end

#titleObject



75
76
77
# File 'lib/linkedin_scraper/profile.rb', line 75

def title
  @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title"))
end

#to_jsonObject



200
201
202
203
# File 'lib/linkedin_scraper/profile.rb', line 200

def to_json
  require "json"
  ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
end

#websitesObject



127
128
129
130
131
132
# File 'lib/linkedin_scraper/profile.rb', line 127

def websites
  @websites ||= @page.search(".websites li").flat_map do |site|
    url = site.at("a")["href"]
    CGI.parse(URI.parse(url).query)["url"]
  end
end