Class: LinkedinData

Inherits:
Object
  • Object
show all
Includes:
GetRelated, Linkedin, ParseProfile
Defined in:
lib/linkedindata.rb

Instance Method Summary collapse

Methods included from ParseProfile

#addPersonFields, #deleteDuplicatePics, #parseResume

Methods included from GetRelated

#addPointsToProfile, #downloadRelated, #fullProfileList, #getList, #getRelatedProfiles, #relScore

Constructor Details

#initialize(todegree, proxylist, use_proxy, use_proxy_li) ⇒ LinkedinData

Returns a new instance of LinkedinData.



17
18
19
20
21
22
23
24
25
26
# File 'lib/linkedindata.rb', line 17

def initialize(todegree, proxylist, use_proxy, use_proxy_li)
  @proxylist = IO.readlines(proxylist)
  @proxy_list_path = proxylist
  @usedproxies = Hash.new
  @output = Array.new
  @startindex = 10
  @numhops = todegree
  @use_proxy = use_proxy
  @use_proxy_li = use_proxy_li
end

Instance Method Details

#gen_driverObject

Generate driver for searches



39
40
41
42
43
# File 'lib/linkedindata.rb', line 39

def gen_driver
  profile = Selenium::WebDriver::Firefox::Profile.new
  profile['intl.accept_languages'] = 'en'
  @driver = Selenium::WebDriver.for :firefox, profile: profile
end

#getByKeywords(search_term) ⇒ Object

Gets all profiles in search results and returns in JSON



96
97
98
99
# File 'lib/linkedindata.rb', line 96

def getByKeywords(search_term)
  search(search_term)
  return prepareResults
end

#getSingleProfile(url) ⇒ Object

Gets one profile and the related profiles



90
91
92
93
# File 'lib/linkedindata.rb', line 90

def getSingleProfile(url)
  scrape(url, 0)
  return prepareResults
end

#prepareResultsObject

Gets related profiles then adds relevance scores and any missing keys



82
83
84
85
86
87
# File 'lib/linkedindata.rb', line 82

def prepareResults
  getRelatedProfiles
  deleteDuplicatePics
  @driver.close
  return JSON.pretty_generate(relScore(showAllKeys(@output)))
end

#scrape(url, curhops) ⇒ Object

Scrapes and parses individual profile



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/linkedindata.rb', line 46

def scrape(url, curhops)
  # Download profile and rescue on error
  begin
    url.gsub!("https", "http")
    profile = Linkedin::Profile.get_profile(url, @driver, curhops, @proxylist, @usedproxies, @use_proxy_li)

    # Parse profile if returned and add to output
    @output.concat(parseResume(profile)) if profile
  rescue
  end
end

#search(search_terms) ⇒ Object

Searches for profiles on Google



29
30
31
32
33
34
35
36
# File 'lib/linkedindata.rb', line 29

def search(search_terms)
  g = GeneralScraper.new("site:linkedin.com/pub", search_terms, @proxy_list_path, @use_proxy)
  gen_driver
  
  JSON.parse(g.getURLs).each do |profile|
    scrape(profile, 0)
  end
end

#showAllKeys(data) ⇒ Object

Make sure all keys that occur occur in each item (even if nil)



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/linkedindata.rb', line 59

def showAllKeys(data)
  # Get all keys
  fields = Set.new
  data.map { |o| fields.merge(o.keys) }

  # Make sure all items have all keys
  datarr = Array.new
  data.each do |d|
    temphash = Hash.new
    fields.each do |f|
      if !d[f]
        temphash[f] = nil
      else
        temphash[f] = d[f]
      end
    end
    datarr.push(temphash)
  end

  return datarr
end