Class: SpiderHtml

Inherits:
Object
  • Object
show all
Defined in:
lib/spider_html.rb,
lib/spider_html/version.rb

Constant Summary collapse

VERSION =
"0.1.9"

Class Method Summary collapse

Class Method Details

.phantom_file(url, file_name, opt = {}) ⇒ Object

SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”) SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”, image_dir: “#Dir.pwd/image”, html_dir: “#Dir.pwd/html”) 可以默认在项目里面constants/spider_html.yml 可以传入image_dir,html_dir,logger



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/spider_html.rb', line 29

def self.phantom_file(url, file_name, opt={})
  spider_html_path = "#{Dir.pwd}/config/constants/spider_html.yml"
  if File.exist?(spider_html_path)
    spider = YAML.load_file(spider_html_path)
  else
    spider = YAML.load_file(File.join(File.dirname(__FILE__), "spider_html.yml"))
  end
  image_dir = opt[:image_dir].nil?? spider["image_dir"] : opt[:image_dir]
  html_dir = opt[:html_dir].nil?? spider["html_dir"] : opt[:html_dir]
  js_path = File.join(File.dirname(__FILE__), "phantom.js")
  logger = opt[:logger]

  if file_name.include?(".png")
    path = "#{image_dir}/#{file_name}"
  else
    path = "#{html_dir}/#{file_name}"
  end

  dir_path = File.dirname(path)
  FileUtils.mkdir_p(dir_path)

  order = "phantomjs #{js_path} #{url} #{path}"
  self.log_info(logger, "system:#{order}")
  result = system order
  if !result
    self.log_error(logger, "phantomjs error:#{order}")
  end
end

.request_http(url, opt = {}) ⇒ Object

SpiderHtml.request_http(“www.baidu.com”) SpiderHtml.request_http(“www.baidu.com”,post) opt传入method,默认是get方法return body, code: code



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/spider_html.rb', line 12

def self.request_http(url, opt={})
  uri = URI(url)
  if opt[:method] == "post"
    req = Net::HTTP::Post.new(uri)
  else
    req = Net::HTTP::Get.new(uri)
  end
  res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => uri.scheme == 'https', :ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE) {|http|
    http.request(req)
  }
  return {body: res.body, code: res.code}
end