Class: WebScrapingHw3::Scrape

Inherits:
Object
  • Object
show all
Defined in:
lib/web_scraping_hw3.rb

Overview

Class to Scrape the set website

Class Method Summary collapse

Class Method Details

.find_companies_page(url) ⇒ Object



36
37
38
39
# File 'lib/web_scraping_hw3.rb', line 36

def self.find_companies_page(url)
  parsed_page = make_parsed(url)
  parsed_page.css("div.col-xs-12.padding-top-10.text-center.capital-letter").css("a")
end

.find_companies_table(companies_page) ⇒ Object



41
42
43
44
45
# File 'lib/web_scraping_hw3.rb', line 41

def self.find_companies_table(companies_page)
  companies_url = "#{BASE_URL}#{companies_page.attributes["href"].value}"
  companies_parsed_page = make_parsed(companies_url)
  companies_parsed_page.css("table.table-profile.table-hover.table-set-border-yellow").css("a")
end

.find_company_highlight(company_tag_a) ⇒ Object



56
57
58
59
60
61
# File 'lib/web_scraping_hw3.rb', line 56

def self.find_company_highlight(company_tag_a)
  stock_profile_page_url = "#{BASE_URL}#{company_tag_a.attributes["href"].value}"
  stock_profile_page = make_parsed(stock_profile_page_url)
  stock_profile_page.css("ul.nav.nav-tabs.set-nav-tabs")
                    .css("a")[1].attributes["href"].value
end

.main_scrapenil

Main function to scrape website. and print the corporation name and asset cost.

Returns:

  • (nil)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/web_scraping_hw3.rb', line 21

def self.main_scrape
  url = "#{BASE_URL}/set/commonslookup.do"
  companies_pages = find_companies_page(url)

  companies_pages.each do |companies_page|
    companies_table = find_companies_table(companies_page)

    companies_table.each do |company_tag_a|
      url_stock_highlight_page = find_company_highlight(company_tag_a)
      stock_url = "#{BASE_URL}#{url_stock_highlight_page}"
      print_asset(stock_url)
    end
  end
end

.make_parsed(url) ⇒ HTML

Convert any url link from string to nokogiri html. To use with any nokogiri tools.

Parameters:

  • url (String)

Returns:

  • (HTML)


51
52
53
54
# File 'lib/web_scraping_hw3.rb', line 51

def self.make_parsed(url)
  unparsed_page = HTTParty.get(url)
  Nokogiri::HTML(unparsed_page.body)
end


63
64
65
66
67
68
69
70
71
72
# File 'lib/web_scraping_hw3.rb', line 63

def self.print_asset(stock_url)
  stock_parsed_page = make_parsed(stock_url)

  name = stock_parsed_page.css("div.col-xs-12.col-md-12.col-lg-8").css("h3").text
  stock_table_row = stock_parsed_page.css("table.table.table-hover.table-info").css("tr")
  table_column = stock_table_row[2].css("td")
  cost = table_column[-2].text

  puts("#{name} : #{cost}")
end