Class: Linsc

Inherits:
Object
  • Object
show all
Includes:
CSVHandlers
Defined in:
lib/linsc.rb

Instance Method Summary collapse

Methods included from CSVHandlers

#append_to_csv, #create_file, #create_file_with_headers, #create_row, #get_headers

Constructor Details

#initializeLinsc

Returns a new instance of Linsc.



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/linsc.rb', line 78

def initialize
  @options = {:noproxy => false, :update => false, :insert => false}
  @working_dir = Pathname.pwd
  @merge_path = @working_dir + 'merged.csv'
  @sf_path = @working_dir + 'sf_ref.csv'
  @crossref_path = @working_dir + 'crossref.csv'
  @ddg_path = @working_dir + 'ddg.csv'

  parser = OptionParser.new do|opts|
    opts.banner = "Must specify update or insert (or both)"
    opts.on('-u', '--update', 'Tell scraper to fetch fresh data for existing Salesforce records') do
      @options[:update] = true;
    end

    opts.on('-i', '--insert', 'Tell scraper to fetch data for new connections not yet in Salesforce') do
      @options[:insert] = true;
    end

    opts.on('-n', '--noproxy', 'Do not use any proxies') do
      @options[:noproxy] = true;
    end

    opts.on('-e', '--history', 'Map Contact IDs to education/employment histories for new connections') do
      map_history_ids
    end

    opts.on('-r', '--restart', 'Restart the project from beginning with the same inputs. WARNING: This will delete all scraped data.') do
      if confirm_restart(true)
        restart_project
        puts "project files deleted"
      else
        puts "exiting"
        exit
      end
    end

    opts.on('-h', '--help', 'Displays Help') do
      puts opts
      exit
    end
  end.parse!

  required_sf_fields = ['LIN ID', 'Email', 'Contact ID']
  sf_headers = get_headers(@sf_path)
  required_sf_fields.each do |field|
    unless sf_headers.include?(field)
      puts "The SF reference sheet must include the #{field} field."
      exit
    end
  end

  if File.exist?(@ddg_path)
    ids = []
    CSV.foreach(@crossref_path, headers: true) do |row|
      ids << row['Contact ID']
    end
    if ids.include?(nil) || ids.include?("")
      @options[:insert] = true
    else
      @options[:insert] = false
    end
    if ids.any?{|id| id && id.length > 0}
      @options[:update] = true
    else
      @options[:update] = false
    end
    puts "\nResuming previous scraping. insert: #{@options[:insert]}, update: #{@options[:update]}, using proxies? #{!@options[:noproxy]}"
  else
    unless @options[:update] || @options[:insert]
      puts "Must specify insert or update. See help for details with -h"
      exit
    end
    puts "\nStarting new project. insert: #{@options[:insert]}, update: #{@options[:update]}, using proxies? #{!@options[:noproxy]}"
  end

  merge unless File.exist?(@ddg_path)
  crossref unless File.exist?(@ddg_path)
  duck
  lin

end

Instance Method Details

#confirm_restart(first = true) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/linsc.rb', line 53

def confirm_restart(first=true)
  if first
    puts "Are you sure you want to restart the project? This will delete all data except the original inputs.\n(y/n)"
  else
    puts "Unknown input. Please enter (y/n)"
  end
  input = gets.chomp
  if input.downcase == 'y'
    return true
  elsif input.downcase == 'n'
    return false
  else
    confirm_restart(false)
  end
end

#crossrefObject



25
26
27
28
# File 'lib/linsc.rb', line 25

def crossref
   CrossRef.new(input_dir: @working_dir, child_path: @merge_path,
   master_path: @sf_path, output_path: @crossref_path, options: @options)
end

#duckObject



30
31
32
# File 'lib/linsc.rb', line 30

def duck
  DuckScraper.new(@working_dir, @crossref_path, @ddg_path, @options).find_profiles
end

#linObject



34
35
36
# File 'lib/linsc.rb', line 34

def lin
  LinScraper.new(@working_dir, @ddg_path, @options).start
end

#map_history_idsObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/linsc.rb', line 38

def map_history_ids
  puts "Mapping ids to history"
  CrossRef.new(input_dir: @working_dir, child_path: @working_dir + "contact_employment_insert.csv",
  master_path: @working_dir + "history_ref.csv", output_path: @working_dir + "contact_employment_insert_with_ids.csv",
  options: {:noproxy => false, :update => true, :insert => false},
  master_lookup_field: 'LIN ID', child_lookup_field: 'LIN ID',
  master_secondary_lookups: nil, static_values: nil)
  CrossRef.new(input_dir: @working_dir, child_path: @working_dir + "contact_education_insert.csv",
  master_path: @working_dir + "history_ref.csv", output_path: @working_dir + "contact_education_insert_with_ids.csv",
  options: {:noproxy => false, :update => true, :insert => false},
  master_lookup_field: 'LIN ID', child_lookup_field: 'LIN ID',
  master_secondary_lookups: nil, static_values: nil)
  exit
end

#mergeObject



18
19
20
21
22
23
# File 'lib/linsc.rb', line 18

def merge
  merge_map = {'FirstName' => 'First Name', 'LastName' => 'Last Name', 'EmailAddress' => 'Email',
                'Company' => 'Employer Organization Name 1', 'Position' => 'Employer 1 Title',
                'Recruiter' => 'LIN 1st Degree'}
  Merger.new(@working_dir, @merge_path, merge_map).merge
end

#restart_projectObject



68
69
70
71
72
73
74
75
76
# File 'lib/linsc.rb', line 68

def restart_project
  files = [@merge_path, @crossref_path, @ddg_path, @working_dir + "contact_update.csv",
     @working_dir + "contact_insert.csv", @working_dir + "contact_employment_update.csv",
      @working_dir + "contact_employment_insert.csv", @working_dir + "contact_education_update.csv",
       @working_dir + "contact_education_insert.csv"]
  files.each do |f|
    File.delete(f) if File.exist?(f)
  end
end