Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/deedveloper/scraper.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeScraper

Returns a new instance of Scraper.



7
8
9
10
11
12
13
14
# File 'lib/deedveloper/scraper.rb', line 7

def initialize
    # performs first round of gets/scraping operations upon intialization
    get_user_job
    get_user_location
    get_user_radius
    get_user_salary
    @doc = Nokogiri::HTML(open("http://www.indeed.com/jobs?as_and=#{user_job}&jt=all&st=&sr=&as_src=&salary=#{user_salary}&radius=#{user_radius}&l=#{user_location}&fromage=any&limit=50&sort=&psf=advsrch", :allow_redirections => :all))
end

Instance Attribute Details

#detail_docObject

Returns the value of attribute detail_doc.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def detail_doc
  @detail_doc
end

#docObject

Returns the value of attribute doc.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def doc
  @doc
end

#user_jobObject

Returns the value of attribute user_job.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def user_job
  @user_job
end

#user_locationObject

Returns the value of attribute user_location.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def user_location
  @user_location
end

#user_radiusObject

Returns the value of attribute user_radius.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def user_radius
  @user_radius
end

#user_salaryObject

Returns the value of attribute user_salary.



5
6
7
# File 'lib/deedveloper/scraper.rb', line 5

def user_salary
  @user_salary
end

Class Method Details

.scrape_detail(input) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/deedveloper/scraper.rb', line 54

def self.scrape_detail(input)
    target_job = Job.all[input.to_i-1]
    @detail_doc = Nokogiri::HTML(open(target_job.job_url, :allow_redirections => :all))
    # TODO: format target_job.description so that it's more readable.
    # target_job.description = @detail_doc.search("div.jobsearch-JobComponent-description").text
    target_job.salary = @detail_doc.search("div.jobsearch-JobMetadataHeader").text.strip
    if target_job.salary.empty?
        target_job.salary = "No salary info available"
    end
end

Instance Method Details

#get_user_jobObject



16
17
18
19
# File 'lib/deedveloper/scraper.rb', line 16

def get_user_job
    puts "What sort of job would you like to search for?"
    @user_job = gets.strip.downcase
end

#get_user_locationObject



21
22
23
24
# File 'lib/deedveloper/scraper.rb', line 21

def get_user_location
    puts "What location would you like to search for?"
    @user_location = gets.strip.downcase
end

#get_user_radiusObject



32
33
34
35
# File 'lib/deedveloper/scraper.rb', line 32

def get_user_radius
    puts "What is your desired search radius in miles?"
    @user_radius = gets.strip
end

#get_user_salaryObject



26
27
28
29
30
# File 'lib/deedveloper/scraper.rb', line 26

def get_user_salary
    #TODO: build logic that recognizes invalid input and reprompts user; invalid or irregular input can affect search results
    puts "What is the yearly salary or salary range that you're looking for? Examples: $65,000, 65-85k, 65k, or 65000.","(Optional: leave blank to continue.)"
    @user_salary = gets.strip
end

#scrape_jobsObject



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/deedveloper/scraper.rb', line 37

def scrape_jobs
    doc.css('div.jobsearch-SerpJobCard').each do |job_card|
        j = Job.new
        j.title = job_card.search("a.jobtitle").text.strip
        j.company = job_card.search("span.company").text.strip
        j.location = job_card.search("span.location").text.strip
        j.when_posted = job_card.search("span.date").text.strip
        if j.location.empty?
            j.location = job_card.search("div.location").text.strip 
        end
        if j.when_posted.empty?
           j.when_posted = job_card.search("span.sponsoredGray").text.strip
        end
        j.job_url = "https://indeed.com" + job_card.search("div.title a").first["href"]
    end
end