Class: SpiderHtml
- Inherits:
-
Object
- Object
- SpiderHtml
- Defined in:
- lib/spider_html.rb,
lib/spider_html/version.rb
Constant Summary collapse
- VERSION =
"0.1.9"
Class Method Summary collapse
-
.phantom_file(url, file_name, opt = {}) ⇒ Object
SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”) SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”, image_dir: “#Dir.pwd/image”, html_dir: “#Dir.pwd/html”) 可以默认在项目里面constants/spider_html.yml 可以传入image_dir,html_dir,logger.
-
.request_http(url, opt = {}) ⇒ Object
SpiderHtml.request_http(“www.baidu.com”) SpiderHtml.request_http(“www.baidu.com”,post) opt传入method,默认是get方法 return body, code: code.
Class Method Details
.phantom_file(url, file_name, opt = {}) ⇒ Object
SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”) SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”, image_dir: “#Dir.pwd/image”, html_dir: “#Dir.pwd/html”) 可以默认在项目里面constants/spider_html.yml 可以传入image_dir,html_dir,logger
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/spider_html.rb', line 29 def self.phantom_file(url, file_name, opt={}) spider_html_path = "#{Dir.pwd}/config/constants/spider_html.yml" if File.exist?(spider_html_path) spider = YAML.load_file(spider_html_path) else spider = YAML.load_file(File.join(File.dirname(__FILE__), "spider_html.yml")) end image_dir = opt[:image_dir].nil?? spider["image_dir"] : opt[:image_dir] html_dir = opt[:html_dir].nil?? spider["html_dir"] : opt[:html_dir] js_path = File.join(File.dirname(__FILE__), "phantom.js") logger = opt[:logger] if file_name.include?(".png") path = "#{image_dir}/#{file_name}" else path = "#{html_dir}/#{file_name}" end dir_path = File.dirname(path) FileUtils.mkdir_p(dir_path) order = "phantomjs #{js_path} #{url} #{path}" self.log_info(logger, "system:#{order}") result = system order if !result self.log_error(logger, "phantomjs error:#{order}") end end |
.request_http(url, opt = {}) ⇒ Object
SpiderHtml.request_http(“www.baidu.com”) SpiderHtml.request_http(“www.baidu.com”,post) opt传入method,默认是get方法return body, code: code
12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/spider_html.rb', line 12 def self.request_http(url, opt={}) uri = URI(url) if opt[:method] == "post" req = Net::HTTP::Post.new(uri) else req = Net::HTTP::Get.new(uri) end res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => uri.scheme == 'https', :ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE) {|http| http.request(req) } return {body: res.body, code: res.code} end |