Class: Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/wpcrawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, type) ⇒ Crawler

Returns a new instance of Crawler.



8
9
10
11
# File 'lib/wpcrawler.rb', line 8

def initialize(url, type)
  @url = url
  @type = type
end

Instance Attribute Details

#urlObject

Returns the value of attribute url.



6
7
8
# File 'lib/wpcrawler.rb', line 6

def url
  @url
end

Instance Method Details

#scrapeObject



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/wpcrawler.rb', line 13

def scrape
  begin
    spinner = TTY::Spinner.new("[:spinner]", format: :dots)
    spinner.auto_spin # Automatic animation with default interval
  output = File.open( "wpoutputfile.csv","w+" )
   output.puts "Date | Title | Author | Link | Status"
   (1..8).each do |n|
     response = HTTP.timeout(5).get("https://" + @url.to_s + "/wp-json/wp/v2/#{@type}/" +"?page=#{n}&per_page=100").to_s
     JSON.parse(response).each do |e|
       output.puts "#{e.fetch('date')}" + "| #{e&.dig('title','rendered')}" + "| #{e.fetch('author')}"+ "| #{e.dig('link')}"+ "| #{e.dig('status')}"
     end rescue TypeError
   end
   output.close
   spinner.stop('Done!') # Stop animation
  rescue HTTP::TimeoutError
puts "Connection ERROR - make sure your website is a wordpress site with an open api"
  end
end