15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
# File 'lib/weworkremotely/scraper.rb', line 15
def scrape_category(category)
case category
when "programming"
open_url = open("https://weworkremotely.com/categories/2-programming/jobs.rss")
when "design"
open_url = open("https://weworkremotely.com/categories/1-design/jobs.rss")
when "devops"
open_url = open("https://weworkremotely.com/categories/6-devops-sysadmin/jobs.rss")
when "marketing"
open_url = open("https://weworkremotely.com/categories/9-marketing/jobs.rss")
when "copywriting"
open_url = open("https://weworkremotely.com/categories/5-copywriting/jobs.rss")
when "customer support"
open_url = open("https://weworkremotely.com/categories/7-customer-support/jobs.rss")
when "business"
open_url = open("https://weworkremotely.com/categories/3-business-exec-management/jobs.rss")
when "other"
open_url = open("https://weworkremotely.com/categories/4-remote/jobs.rss")
end
puts "Scraping for #{category} jobs..."
doc = Nokogiri::XML(open_url)
@@results = []
doc.xpath("//item").each do |list_item|
title = list_item.css('title').text.split(": ")
catgeory = WeWorkRemotely::Category.find_or_create_by_name(category)
id = list_item.css('guid').text.split("/").last.split("-").first
sanitizer = Rails::Html::FullSanitizer.new
sanitized_description = sanitizer.sanitize(list_item.css('description').text)
@@results << WeWorkRemotely::Job.new(id: id, company: title[0], name: title[1], description: sanitized_description, published_date: list_item.css('pubDate').text, url: list_item.css('guid').text, category: category)
end
end
|