Module: Gbbib::TScrapper

Extended by:
Scrapper
Defined in:
lib/gbbib/t_scrapper.rb

Overview

Social standard scarpper.

Class Method Summary collapse

Methods included from Scrapper

get_docid, get_status, get_titles, get_type, scrapped_data

Class Method Details

.scrape_doc(pid) ⇒ Gbbib::GbBibliographicItem



38
39
40
41
42
# File 'lib/gbbib/t_scrapper.rb', line 38

def scrape_doc(pid)
  src = "http://www.ttbz.org.cn#{pid}"
  doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
  GbBibliographicItem.new scrapped_data(doc, src: src)
end

.scrape_page(text) ⇒ Gbbib::HitCollection

rubocop:disable Metrics/MethodLength, Metrics/AbcSize



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/gbbib/t_scrapper.rb', line 20

def scrape_page(text)
  search_html = OpenURI.open_uri(
    'http://www.ttbz.org.cn/Home/Standard?searchType=2&key=' +
    CGI.escape(text.tr('-', [8212].pack('U')))
  )
  header = Nokogiri::HTML search_html
  xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
  t_xpath = '../preceding-sibling::td[3]'
  hits = header.xpath(xpath).map do |h|
    title = h.at(t_xpath).text.gsub(/â\u0080\u0094/, '-')
    Hit.new pid: h[:href].sub(%r{\/$}, ''), title: title, scrapper: self
  end
  HitCollection.new hits
end