Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/lab3.rb

Overview

Instead of printing Delta’s information. Your script looks up at www.set.or.th/set/commonslookup.do and loop through all stock data and print their assets in stead.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeScraper

Returns a new instance of Scraper.



44
45
46
47
# File 'lib/lab3.rb', line 44

def initialize
    url = HTTParty.get('https://www.set.or.th/set/commonslookup.do')
    @parse_page ||= Nokogiri::HTML(url.body)
end

Instance Attribute Details

#parse_pageObject

Returns the value of attribute parse_page.



42
43
44
# File 'lib/lab3.rb', line 42

def parse_page
  @parse_page
end

Class Method Details

.executeObject



87
88
89
90
91
92
93
94
95
96
# File 'lib/lab3.rb', line 87

def self.execute
    scraper = Scraper.new
    header_link = scraper.get_header_link[2..-1]
    company_link = scraper.get_company_link(header_link)
    names = scraper.get_names(company_link)
    assets = scraper.get_assets(company_link)

    arr = names.zip(assets)
    arr.each{|element| puts "#{element[0]} : #{element[1]}"}
end

Instance Method Details

#get_assets(company_link) ⇒ Object



76
77
78
79
80
81
82
83
84
85
# File 'lib/lab3.rb', line 76

def get_assets(company_link)
    assets = []
    company_link.each do |link|
        link = "https://www.set.or.th/set/companyhighlight" + link[19...42] + "5&language=th&country=TH"
        url = HTTParty.get(link)
        third_parse_page ||= Nokogiri::HTML(url.body)
        assets.push(third_parse_page.css("tr")[2].css("td")[-2].text)
    end
    return assets
end


53
54
55
56
57
58
59
60
61
62
63
# File 'lib/lab3.rb', line 53

def get_company_link(header_link)
    uri = []
    header_link.each do |link|
        link = "https://www.set.or.th/" + link
        url = HTTParty.get(link)
        second_parse_page ||= Nokogiri::HTML(url.body)
        sec = second_parse_page.css("tr").css("td a[href]")
        sec.each{|link| uri.push(link['href'])}
    end
    return uri
end


49
50
51
# File 'lib/lab3.rb', line 49

def get_header_link
    header_link = parse_page.css(".col-xs-12").children.map {|link| link['href']}.compact
end

#get_names(company_link) ⇒ Object



65
66
67
68
69
70
71
72
73
74
# File 'lib/lab3.rb', line 65

def get_names(company_link)
    names = []
    company_link.each do |link|
        link = "https://www.set.or.th/" + link
        url = HTTParty.get(link)
        third_parse_page ||= Nokogiri::HTML(url.body)
        names.push(third_parse_page.css("h3").text)
    end
    return names
end