Class: Linkedin::Profile
- Inherits:
-
Object
- Object
- Linkedin::Profile
- Defined in:
- lib/linkedin_scraper/profile.rb
Constant Summary collapse
- USER_AGENTS =
USER_AGENTS = [“Windows IE 6”, “Windows IE 7”, “Windows Mozilla”, “Mac Safari”, “Mac Firefox”, “Mac Mozilla”, “Linux Mozilla”, “Linux Firefox”, “Linux Konqueror”]
[ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0", "Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0", "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0", "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0", "Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0", "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)", "Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)", "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)", "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00", "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1" ]
- ATTRIBUTES =
%w( name first_name last_name title location number_of_connections country industry summary picture projects linkedin_url education groups websites languages skills certifications organizations past_companies current_companies recommended_visitors)
Instance Attribute Summary collapse
-
#linkedin_url ⇒ Object
readonly
Returns the value of attribute linkedin_url.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
Class Method Summary collapse
-
.get_profile(url, options = {}) ⇒ Object
support old version.
Instance Method Summary collapse
- #certifications ⇒ Object
- #country ⇒ Object
- #current_companies ⇒ Object
- #education ⇒ Object
- #first_name ⇒ Object
- #groups ⇒ Object
- #industry ⇒ Object
-
#initialize(url, options = {}) ⇒ Profile
constructor
A new instance of Profile.
- #languages ⇒ Object
- #last_name ⇒ Object
- #location ⇒ Object
- #name ⇒ Object
- #number_of_connections ⇒ Object
- #organizations ⇒ Object
- #past_companies ⇒ Object
- #picture ⇒ Object
- #projects ⇒ Object
- #recommended_visitors ⇒ Object
- #skills ⇒ Object
- #summary ⇒ Object
- #title ⇒ Object
- #to_json ⇒ Object
- #websites ⇒ Object
Constructor Details
#initialize(url, options = {}) ⇒ Profile
Returns a new instance of Profile.
57 58 59 60 61 |
# File 'lib/linkedin_scraper/profile.rb', line 57 def initialize(url, = {}) @linkedin_url = url @options = @page = http_client.get(url) end |
Instance Attribute Details
#linkedin_url ⇒ Object (readonly)
Returns the value of attribute linkedin_url.
48 49 50 |
# File 'lib/linkedin_scraper/profile.rb', line 48 def linkedin_url @linkedin_url end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
48 49 50 |
# File 'lib/linkedin_scraper/profile.rb', line 48 def page @page end |
Class Method Details
Instance Method Details
#certifications ⇒ Object
160 161 162 163 164 165 166 167 168 169 |
# File 'lib/linkedin_scraper/profile.rb', line 160 def certifications @certifications ||= @page.search("background-certifications").map do |item| name = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil license = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil { :name => name, :authority => , :license => license, :start_date => start_date } end end |
#country ⇒ Object
87 88 89 |
# File 'lib/linkedin_scraper/profile.rb', line 87 def country @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality")) end |
#current_companies ⇒ Object
111 112 113 |
# File 'lib/linkedin_scraper/profile.rb', line 111 def current_companies @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"} end |
#education ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/linkedin_scraper/profile.rb', line 115 def education @education ||= @page.search(".schools .school").map do |item| name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4") desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree") major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major") period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range") start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date } end end |
#first_name ⇒ Object
67 68 69 |
# File 'lib/linkedin_scraper/profile.rb', line 67 def first_name @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn")) end |
#groups ⇒ Object
134 135 136 137 138 139 140 |
# File 'lib/linkedin_scraper/profile.rb', line 134 def groups @groups ||= @page.search("#groups .group .item-title").map do |item| name = item.text.gsub(/\s+|\n/, " ").strip link = item.at("a")['href'] { :name => name, :link => link } end end |
#industry ⇒ Object
91 92 93 |
# File 'lib/linkedin_scraper/profile.rb', line 91 def industry @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor")) end |
#languages ⇒ Object
152 153 154 155 156 157 158 |
# File 'lib/linkedin_scraper/profile.rb', line 152 def languages @languages ||= @page.search(".background-languages #languages ol li").map do |item| language = item.at("h4").text rescue nil proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil { :language => language, :proficiency => proficiency } end end |
#last_name ⇒ Object
71 72 73 |
# File 'lib/linkedin_scraper/profile.rb', line 71 def last_name @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn")) end |
#location ⇒ Object
79 80 81 |
# File 'lib/linkedin_scraper/profile.rb', line 79 def location @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality")) end |
#name ⇒ Object
63 64 65 |
# File 'lib/linkedin_scraper/profile.rb', line 63 def name "#{first_name} #{last_name}" end |
#number_of_connections ⇒ Object
83 84 85 |
# File 'lib/linkedin_scraper/profile.rb', line 83 def number_of_connections @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections") end |
#organizations ⇒ Object
142 143 144 145 146 147 148 149 150 |
# File 'lib/linkedin_scraper/profile.rb', line 142 def organizations @organizations ||= @page.search("#background-organizations .section-item").map do |item| name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil start_date = Date.parse(start_date) rescue nil end_date = Date.parse(end_date) rescue nil { :name => name, :start_date => start_date, :end_date => end_date } end end |
#past_companies ⇒ Object
107 108 109 |
# File 'lib/linkedin_scraper/profile.rb', line 107 def past_companies @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"} end |
#picture ⇒ Object
99 100 101 |
# File 'lib/linkedin_scraper/profile.rb', line 99 def picture @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img')) end |
#projects ⇒ Object
185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# File 'lib/linkedin_scraper/profile.rb', line 185 def projects @projects ||= @page.search("#projects .project").map do |project| p = {} start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil p[:title] = project.at(".item-title").text p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil p[:start_date] = parse_date(start_date) rescue nil p[:end_date] = parse_date(end_date) rescue nil p[:description] = project.at(".description").text rescue nil p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil p end end |
#recommended_visitors ⇒ Object
172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/linkedin_scraper/profile.rb', line 172 def recommended_visitors @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor| v = {} v[:link] = visitor.at("a")["href"] v[:name] = visitor.at("h4/a").text if visitor.at(".headline") v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1] end v end end |
#skills ⇒ Object
103 104 105 |
# File 'lib/linkedin_scraper/profile.rb', line 103 def skills @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil) end |
#summary ⇒ Object
95 96 97 |
# File 'lib/linkedin_scraper/profile.rb', line 95 def summary @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description")) end |
#title ⇒ Object
75 76 77 |
# File 'lib/linkedin_scraper/profile.rb', line 75 def title @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title")) end |
#to_json ⇒ Object
200 201 202 203 |
# File 'lib/linkedin_scraper/profile.rb', line 200 def to_json require "json" ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json end |
#websites ⇒ Object
127 128 129 130 131 132 |
# File 'lib/linkedin_scraper/profile.rb', line 127 def websites @websites ||= @page.search(".websites li").flat_map do |site| url = site.at("a")["href"] CGI.parse(URI.parse(url).query)["url"] end end |