Class: MyUniversalJobsMatch

Inherits:
Object
  • Object
show all
Defined in:
lib/myuniversaljobsmatch.rb

Instance Method Summary collapse

Constructor Details

#initialize(filepath: '') ⇒ MyUniversalJobsMatch

Returns a new instance of MyUniversalJobsMatch.



24
25
26
27
28
29
30
31
32
33
# File 'lib/myuniversaljobsmatch.rb', line 24

def initialize(filepath: '')

  @filepath = filepath

  @url_base = 'https://findajob.dwp.gov.uk/'
  
  @dx = Dynarex.new 'ujm[title,tags]/item(job_id, title, ' + \
      'description, posting_date, company, location, industries, job_type)'

end

Instance Method Details

#dynarexObject



35
36
37
# File 'lib/myuniversaljobsmatch.rb', line 35

def dynarex()
  @dx
end

#query(id) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/myuniversaljobsmatch.rb', line 115

def query(id)

  doc = Nokorexi.new(@url_base + 'details/' + id).to_doc    
  
  title = doc.root.text('head/title')
  
  rows = doc.root.xpath('//table[1]/tbody/tr')
  
  h = rows.map do |tr|
    
    [
      tr.text('th').downcase.rstrip[0..-2].gsub(/ +/,'_').to_sym, 
      tr.text('td').to_s
    ]
    
  end.to_h

  h[:description] = doc.root.element('//div[@itemprop="description"]').xml\
      .gsub(/<br *\/> */,"\n").gsub(/<\/?[^\>]+\/?>/,'').strip    
  
  h[:posting_date] = h[:posting_date].to_date
  h[:closing_date] = h[:closing_date].to_date
  
  {title: title}.merge(h)
  
end

#search(title: '', where: '', results_per_page: nil, sort_by: nil, hours: nil, contract_type: nil) ⇒ Object

options:

results_per_page: 10, 25, 50
sort_by: date, highest_salary, lowest_salary
hours: any, full_time, part_time
contract_type: any, permanent, temporary, contract, apprenticeship


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/myuniversaljobsmatch.rb', line 45

def search(title: '', where: '', results_per_page: nil, sort_by: nil, 
           hours: nil, contract_type: nil)
  
  params = {
    adv: 1,
    q: title,
    w: where
  }
  
  params[:pp] = results_per_page if results_per_page
  params[:cty] = contract_type if contract_type
  params[:cti] = hours if hours
  
  if sort_by then
    case sort_by.to_sym
    when :date
      params[:sb] = :date
      params[:sd] = :down
    when :highest_salary
      params[:sb] = :salary
      params[:sd] = :down
    when :lowest_salary
      params[:sb] = :salary      
      params[:sd] = :up
    end
  end
      
  url = @url_base + 'search/?' + params.map {|x| x.join('=') }.join('&')
  doc = Nokorexi.new(url).to_doc    
  
  rows = doc.root.xpath('//div[@class="search-result"]')
  
  a = rows.map do |row|

    items  = row.xpath('ul/li')
    joburl = row.element('h3/a/@href').to_s
    jobtitle = row.element('h3/a/text()')
    jobid = joburl[/\d+$/]
    jobref = title[/^\d+/].to_s

    date = items[0].text.to_date
    company = items[1].text('strong')
    location = items[1].element('span/text()')
    salary = items[2].text('strong') if items[2]
    desc = row.text('p').strip

    [jobid, jobref, date, jobtitle, joburl, company, location, 
     salary, desc]
    
  end


  dx = Dynarex.new('vacancies[title, desc, date, time, tags, xslt]/' + \
           'vacancy(job_id, job_ref, date, title, url, company, location, salary, desc)')

  dx.title = "Find a job - Search results for '#{title}'"
  dx.desc = "generated from web scrape of Find a job." + \
                                              "findajob.dwp.gov.uk/; source: " + url
  dx.tags = 'jobs vacancies jobmatch ' + title.split.first
  dx.date = Time.now.strftime("%Y-%b-%d")
  dx.time = Time.now.strftime("%H:%M")

  a.each do |row|
    dx.create Hash[(%i(job_id job_ref date title url company) + \
                    %i(location salary desc)).zip(row)]
  end

  return dx
end