Class: Linsc
Instance Method Summary collapse
- #confirm_restart(first = true) ⇒ Object
- #crossref ⇒ Object
- #duck ⇒ Object
-
#initialize ⇒ Linsc
constructor
A new instance of Linsc.
- #lin ⇒ Object
- #map_history_ids ⇒ Object
- #merge ⇒ Object
- #restart_project ⇒ Object
Methods included from CSVHandlers
#append_to_csv, #create_file, #create_file_with_headers, #create_row, #get_headers
Constructor Details
#initialize ⇒ Linsc
Returns a new instance of Linsc.
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/linsc.rb', line 78 def initialize @options = {:noproxy => false, :update => false, :insert => false} @working_dir = Pathname.pwd @merge_path = @working_dir + 'merged.csv' @sf_path = @working_dir + 'sf_ref.csv' @crossref_path = @working_dir + 'crossref.csv' @ddg_path = @working_dir + 'ddg.csv' parser = OptionParser.new do|opts| opts. = "Must specify update or insert (or both)" opts.on('-u', '--update', 'Tell scraper to fetch fresh data for existing Salesforce records') do @options[:update] = true; end opts.on('-i', '--insert', 'Tell scraper to fetch data for new connections not yet in Salesforce') do @options[:insert] = true; end opts.on('-n', '--noproxy', 'Do not use any proxies') do @options[:noproxy] = true; end opts.on('-e', '--history', 'Map Contact IDs to education/employment histories for new connections') do map_history_ids end opts.on('-r', '--restart', 'Restart the project from beginning with the same inputs. WARNING: This will delete all scraped data.') do if confirm_restart(true) restart_project puts "project files deleted" else puts "exiting" exit end end opts.on('-h', '--help', 'Displays Help') do puts opts exit end end.parse! required_sf_fields = ['LIN ID', 'Email', 'Contact ID'] sf_headers = get_headers(@sf_path) required_sf_fields.each do |field| unless sf_headers.include?(field) puts "The SF reference sheet must include the #{field} field." exit end end if File.exist?(@ddg_path) ids = [] CSV.foreach(@crossref_path, headers: true) do |row| ids << row['Contact ID'] end if ids.include?(nil) || ids.include?("") @options[:insert] = true else @options[:insert] = false end if ids.any?{|id| id && id.length > 0} @options[:update] = true else @options[:update] = false end puts "\nResuming previous scraping. insert: #{@options[:insert]}, update: #{@options[:update]}, using proxies? #{!@options[:noproxy]}" else unless @options[:update] || @options[:insert] puts "Must specify insert or update. See help for details with -h" exit end puts "\nStarting new project. insert: #{@options[:insert]}, update: #{@options[:update]}, using proxies? #{!@options[:noproxy]}" end merge unless File.exist?(@ddg_path) crossref unless File.exist?(@ddg_path) duck lin end |
Instance Method Details
#confirm_restart(first = true) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/linsc.rb', line 53 def confirm_restart(first=true) if first puts "Are you sure you want to restart the project? This will delete all data except the original inputs.\n(y/n)" else puts "Unknown input. Please enter (y/n)" end input = gets.chomp if input.downcase == 'y' return true elsif input.downcase == 'n' return false else confirm_restart(false) end end |
#crossref ⇒ Object
25 26 27 28 |
# File 'lib/linsc.rb', line 25 def crossref CrossRef.new(input_dir: @working_dir, child_path: @merge_path, master_path: @sf_path, output_path: @crossref_path, options: @options) end |
#duck ⇒ Object
30 31 32 |
# File 'lib/linsc.rb', line 30 def duck DuckScraper.new(@working_dir, @crossref_path, @ddg_path, @options).find_profiles end |
#lin ⇒ Object
34 35 36 |
# File 'lib/linsc.rb', line 34 def lin LinScraper.new(@working_dir, @ddg_path, @options).start end |
#map_history_ids ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/linsc.rb', line 38 def map_history_ids puts "Mapping ids to history" CrossRef.new(input_dir: @working_dir, child_path: @working_dir + "contact_employment_insert.csv", master_path: @working_dir + "history_ref.csv", output_path: @working_dir + "contact_employment_insert_with_ids.csv", options: {:noproxy => false, :update => true, :insert => false}, master_lookup_field: 'LIN ID', child_lookup_field: 'LIN ID', master_secondary_lookups: nil, static_values: nil) CrossRef.new(input_dir: @working_dir, child_path: @working_dir + "contact_education_insert.csv", master_path: @working_dir + "history_ref.csv", output_path: @working_dir + "contact_education_insert_with_ids.csv", options: {:noproxy => false, :update => true, :insert => false}, master_lookup_field: 'LIN ID', child_lookup_field: 'LIN ID', master_secondary_lookups: nil, static_values: nil) exit end |
#merge ⇒ Object
18 19 20 21 22 23 |
# File 'lib/linsc.rb', line 18 def merge merge_map = {'FirstName' => 'First Name', 'LastName' => 'Last Name', 'EmailAddress' => 'Email', 'Company' => 'Employer Organization Name 1', 'Position' => 'Employer 1 Title', 'Recruiter' => 'LIN 1st Degree'} Merger.new(@working_dir, @merge_path, merge_map).merge end |
#restart_project ⇒ Object
68 69 70 71 72 73 74 75 76 |
# File 'lib/linsc.rb', line 68 def restart_project files = [@merge_path, @crossref_path, @ddg_path, @working_dir + "contact_update.csv", @working_dir + "contact_insert.csv", @working_dir + "contact_employment_update.csv", @working_dir + "contact_employment_insert.csv", @working_dir + "contact_education_update.csv", @working_dir + "contact_education_insert.csv"] files.each do |f| File.delete(f) if File.exist?(f) end end |