Class: AnimalInfo::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scraper.rb

Class Method Summary collapse

Class Method Details

.get_html(name) ⇒ Object



30
31
32
33
# File 'lib/scraper.rb', line 30

def self.get_html(name)
  url = "https://en.wikipedia.org/wiki/#{name}"
  Nokogiri::HTML(open(url))
end

.scrape_from_wikipedia(name) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/scraper.rb', line 2

def self.scrape_from_wikipedia(name)
  html = get_html(name)
  animal_name = html.search("h1#firstHeading").text
  properties = { name: animal_name }
  categories = ["Kingdom", "Phylum", "Class", "Order"]

  html.search("table.infobox.biota tr").each do |table_row|
    table_data = table_row.search("td")
    if table_data.size == 2
      category = table_data.first.text.strip.gsub(":", "")

      if categories.include?(category)
        if !table_data.last.search("b").empty?
          category_info = table_data.last.search("b").text.strip
        else
          category_info = table_data.last.text.strip
        end

        category = "Klass" if category == "Class"

        properties[category.downcase.to_sym] = category_info
      end
    end
  end
  properties[:url] = "https://en.wikipedia.org/wiki/#{name}"
  properties
end