Class: ClearanceJobsComParser

Inherits:
Object
  • Object
show all
Includes:
FailureHandler
Defined in:
lib/clearancejobscom/clearance_jobs_com_parser.rb

Instance Method Summary collapse

Methods included from FailureHandler

#get_retry

Constructor Details

#initialize(url, page, requests = nil) ⇒ ClearanceJobsComParser

Returns a new instance of ClearanceJobsComParser.



9
10
11
12
13
14
15
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 9

def initialize(url, page, requests=nil)
  @url = url
  @requests = requests
  @i = 0
  @html = page
  @page = Nokogiri::HTML.parse(page)
end

Instance Method Details

#company_nameObject

Get the company name



50
51
52
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 50

def company_name
  @page.css("h2").text
end

#employment_statusObject

Get the employment status for the position



107
108
109
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 107

def employment_status
  get_element_value("Status:")
end

#get_element(phrase) ⇒ Object

Get the element including the phrase specified



134
135
136
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 134

def get_element(phrase)
  @page.css(".cj-job-data").select{|d| d.text.include?(phrase) }
end

#get_element_value(phrase) ⇒ Object

Get the value for the element



128
129
130
131
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 128

def get_element_value(phrase)
  element = get_element(phrase)[0]
  element.css("strong").text if element
end

#group_idObject

Get the group ID



97
98
99
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 97

def group_id
  get_element_value("Group ID")
end

#job_categoryObject

Get the job category



92
93
94
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 92

def job_category
  get_element_value("Job Category:")
end

#job_descriptionObject

Get the job description



66
67
68
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 66

def job_description
  @page.css("div.margin-bottom-20").select{|e| e['itemprop'] == "description"}[0].to_html
end

#job_description_plaintextObject

Get the job description without text



71
72
73
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 71

def job_description_plaintext
  Nokogiri::HTML.parse(job_description.gsub('<br />',"\n").gsub('<br>', "\n").gsub('<br/>', "\n")).text
end

#job_titleObject

Get the job title



61
62
63
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 61

def job_title
  @page.css("h1").text
end

#locationObject

Get the job location



55
56
57
58
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 55

def location
  raw_location = @page.css("div").select{|e| e['itemprop'] == "hiringOrganization"}[0].css("h3").text
  raw_location.gsub(/(\d)/, "").strip if raw_location
end

#parseObject

Parse the profile



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 18

def parse
  begin
    return {
      url: @url,
      company_name: company_name,
      location: location,
      job_title: job_title,
      job_description: job_description,
      job_description_plaintext: job_description_plaintext,
      required_travel: required_travel,
      salary: salary,
      salary_notes: salary_notes,
      job_category: job_category,
      group_id: group_id,
      required_experience: required_experience,
      employment_status: employment_status,
      required_clearance: required_clearance,
      work_environment: work_environment,
      posting_date: posting_date,
      html: @html
    }
  rescue
    @i += 1
    if @i < 10
      @html = get_retry(@url, @requests, @i)
      @page = Nokogiri::HTML.parse(@html)
      parse
    end
  end
end

#posting_dateObject

Get the date of the posting



122
123
124
125
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 122

def posting_date
  element = get_element("Post Date:")
  DateTime.parse(element[0].css("meta")[0]['content'])
end

#required_clearanceObject

Get the clearance level



112
113
114
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 112

def required_clearance
  get_element_value("Minimum Clearance")
end

#required_experienceObject

Get the # of years of experience required



102
103
104
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 102

def required_experience
  get_element_value("Minimum Experience Required")  
end

#required_travelObject

Get if there is travel required



76
77
78
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 76

def required_travel
  get_element_value("Travel:")
end

#salaryObject

Get the salary



81
82
83
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 81

def salary
  get_element_value("Compensation:")
end

#salary_notesObject

Get notes about the salary



86
87
88
89
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 86

def salary_notes
  salary_info = get_element_value("Compensation Comments:")
  salary_info.lstrip.strip if salary_info
end

#work_environmentObject

Get the work environment



117
118
119
# File 'lib/clearancejobscom/clearance_jobs_com_parser.rb', line 117

def work_environment
  get_element_value("Workplace:")
end