Class: Linkedin::Profile
- Inherits:
-
Object
- Object
- Linkedin::Profile
- Defined in:
- lib/linkedin_scraper/profile.rb
Constant Summary collapse
- USER_AGENTS =
["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
- ATTRIBUTES =
%w( name first_name last_name title location number_of_connections country industry summary picture projects linkedin_url education groups websites languages skills certifications organizations past_companies current_companies recommended_visitors)
Instance Attribute Summary collapse
-
#linkedin_url ⇒ Object
readonly
Returns the value of attribute linkedin_url.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
Class Method Summary collapse
Instance Method Summary collapse
- #certifications ⇒ Object
- #country ⇒ Object
- #current_companies ⇒ Object
- #education ⇒ Object
- #first_name ⇒ Object
- #groups ⇒ Object
- #industry ⇒ Object
-
#initialize(url, options = {}) ⇒ Profile
constructor
A new instance of Profile.
- #languages ⇒ Object
- #last_name ⇒ Object
- #location ⇒ Object
- #name ⇒ Object
- #number_of_connections ⇒ Object
- #organizations ⇒ Object
- #past_companies ⇒ Object
- #picture ⇒ Object
- #projects ⇒ Object
- #recommended_visitors ⇒ Object
- #skills ⇒ Object
- #summary ⇒ Object
- #title ⇒ Object
- #to_json ⇒ Object
- #websites ⇒ Object
Constructor Details
#initialize(url, options = {}) ⇒ Profile
Returns a new instance of Profile.
38 39 40 41 42 |
# File 'lib/linkedin_scraper/profile.rb', line 38 def initialize(url, = {}) @linkedin_url = url = @page = http_client.get(url) end |
Instance Attribute Details
#linkedin_url ⇒ Object (readonly)
Returns the value of attribute linkedin_url.
30 31 32 |
# File 'lib/linkedin_scraper/profile.rb', line 30 def linkedin_url @linkedin_url end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
30 31 32 |
# File 'lib/linkedin_scraper/profile.rb', line 30 def page @page end |
Class Method Details
.get_profile(url, options = {}) ⇒ Object
32 33 34 35 36 |
# File 'lib/linkedin_scraper/profile.rb', line 32 def self.get_profile(url, = {}) Linkedin::Profile.new(url, ) rescue => e puts e end |
Instance Method Details
#certifications ⇒ Object
141 142 143 144 145 146 147 148 149 150 |
# File 'lib/linkedin_scraper/profile.rb', line 141 def certifications @certifications ||= @page.search("background-certifications").map do |item| name = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil license = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil { :name => name, :authority => , :license => license, :start_date => start_date } end end |
#country ⇒ Object
68 69 70 |
# File 'lib/linkedin_scraper/profile.rb', line 68 def country @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality")) end |
#current_companies ⇒ Object
92 93 94 |
# File 'lib/linkedin_scraper/profile.rb', line 92 def current_companies @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"} end |
#education ⇒ Object
96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/linkedin_scraper/profile.rb', line 96 def education @education ||= @page.search(".schools .school").map do |item| name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4") desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree") major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major") period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range") start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date } end end |
#first_name ⇒ Object
48 49 50 |
# File 'lib/linkedin_scraper/profile.rb', line 48 def first_name @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn")) end |
#groups ⇒ Object
115 116 117 118 119 120 121 |
# File 'lib/linkedin_scraper/profile.rb', line 115 def groups @groups ||= @page.search("#groups .group .item-title").map do |item| name = item.text.gsub(/\s+|\n/, " ").strip link = item.at("a")['href'] { :name => name, :link => link } end end |
#industry ⇒ Object
72 73 74 |
# File 'lib/linkedin_scraper/profile.rb', line 72 def industry @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor")) end |
#languages ⇒ Object
133 134 135 136 137 138 139 |
# File 'lib/linkedin_scraper/profile.rb', line 133 def languages @languages ||= @page.search(".background-languages #languages ol li").map do |item| language = item.at("h4").text rescue nil proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil { :language => language, :proficiency => proficiency } end end |
#last_name ⇒ Object
52 53 54 |
# File 'lib/linkedin_scraper/profile.rb', line 52 def last_name @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn")) end |
#location ⇒ Object
60 61 62 |
# File 'lib/linkedin_scraper/profile.rb', line 60 def location @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality")) end |
#name ⇒ Object
44 45 46 |
# File 'lib/linkedin_scraper/profile.rb', line 44 def name "#{first_name} #{last_name}" end |
#number_of_connections ⇒ Object
64 65 66 |
# File 'lib/linkedin_scraper/profile.rb', line 64 def number_of_connections @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections") end |
#organizations ⇒ Object
123 124 125 126 127 128 129 130 131 |
# File 'lib/linkedin_scraper/profile.rb', line 123 def organizations @organizations ||= @page.search("#background-organizations .section-item").map do |item| name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil start_date = Date.parse(start_date) rescue nil end_date = Date.parse(end_date) rescue nil { :name => name, :start_date => start_date, :end_date => end_date } end end |
#past_companies ⇒ Object
88 89 90 |
# File 'lib/linkedin_scraper/profile.rb', line 88 def past_companies @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"} end |
#picture ⇒ Object
80 81 82 |
# File 'lib/linkedin_scraper/profile.rb', line 80 def picture @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img')) end |
#projects ⇒ Object
166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/linkedin_scraper/profile.rb', line 166 def projects @projects ||= @page.search("#projects .project").map do |project| p = {} start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil p[:title] = project.at(".item-title").text p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil p[:start_date] = parse_date(start_date) rescue nil p[:end_date] = parse_date(end_date) rescue nil p[:description] = project.at(".description").text rescue nil p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil p end end |
#recommended_visitors ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/linkedin_scraper/profile.rb', line 153 def recommended_visitors @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor| v = {} v[:link] = visitor.at("a")["href"] v[:name] = visitor.at("h4/a").text if visitor.at(".headline") v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1] end v end end |
#skills ⇒ Object
84 85 86 |
# File 'lib/linkedin_scraper/profile.rb', line 84 def skills @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil) end |
#summary ⇒ Object
76 77 78 |
# File 'lib/linkedin_scraper/profile.rb', line 76 def summary @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description")) end |
#title ⇒ Object
56 57 58 |
# File 'lib/linkedin_scraper/profile.rb', line 56 def title @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title")) end |
#to_json ⇒ Object
181 182 183 184 |
# File 'lib/linkedin_scraper/profile.rb', line 181 def to_json require "json" ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json end |
#websites ⇒ Object
108 109 110 111 112 113 |
# File 'lib/linkedin_scraper/profile.rb', line 108 def websites @websites ||= @page.search(".websites li").flat_map do |site| url = site.at("a")["href"] CGI.parse(URI.parse(url).query)["url"] end end |