Class: IS22Plus

Inherits:
IndeedScraper2022 show all
Defined in:
lib/indeed_scraper2022.rb

Instance Attribute Summary

Attributes inherited from IndeedScraper2022

#browser

Instance Method Summary collapse

Methods inherited from IndeedScraper2022

#page, #results, #search

Constructor Details

#initialize(q: '', location: '', headless: true, cookies: nil, debug: false) ⇒ IS22Plus

Returns a new instance of IS22Plus.



263
264
265
266
# File 'lib/indeed_scraper2022.rb', line 263

def initialize(q: '', location: '', headless: true, cookies: nil, debug: false)
  super(q: q, location: location, headless: headless, cookies: cookies,
        debug: true)
end

Instance Method Details

#archive(filepath = '/tmp/indeed') ⇒ Object

note: The most efficient method to accumulate vacancy articles is to

execute archive() daily


271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# File 'lib/indeed_scraper2022.rb', line 271

def archive(filepath='/tmp/indeed')

  search() if @results.nil?

  return unless @results

  FileUtils.mkdir_p filepath

  idxfile = File.join(filepath, 'index.yml')

  index = if File.exists? idxfile then
    YAML.load(File.read(idxfile))
  else
    {}
  end

  @results.each.with_index do |item, i|

    puts 'saving ' + item[:title] if @debug
    puts 'link: ' + item[:link].inspect
    url = URL.reveal(item[:link])
    item[:link] = url
    puts 'url: ' + url.inspect if @debug
    id = url[/(?<=jk=)[^&]+/]

    if index[id.to_sym] then

      # the vacancy record has previously been saved
      #
      next

    else

      # write the full page vacancy article to file
      #
      File.write File.join(filepath, 'j' + id + '.txt'), page(i+1)

      h = {
        link: url[/^[^&]+/],
        title: item[:title].to_s,
        salary: item[:salary].to_s,
        company: item[:company].to_s.strip,
        location: item[:location].to_s,
        jobsnippet: item[:jobsnippet].to_s,
        date: item[:date],
        added: Time.now.strftime("%Y-%m-%d")
      }

      # add the vacancy snippet to the index file
      #
      index[id.to_sym] = h
    end

  end

  # save the vacancy index file
  #
  File.write idxfile, index.to_yaml

end

#listObject



332
333
334
335
336
337
338
# File 'lib/indeed_scraper2022.rb', line 332

def list()

  @results.map.with_index do |x,i|
    "%2d. %s" % [i+1,x[:title]]
  end.join("\n")

end