Class: MetaInspector::Document

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Exceptionable
Defined in:
lib/meta_inspector/document.rb

Overview

A MetaInspector::Document knows about its URL and its contents

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(initial_url, options = {}) ⇒ Document

Initializes a new instance of MetaInspector::Document, setting the URL Options:

  • connection_timeout: defaults to 20 seconds

  • read_timeout: defaults to 20 seconds

  • retries: defaults to 3 times

  • html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false.

  • allow_redirections: when true, follow HTTP redirects. Defaults to true

  • document: the html of the url as a string

  • warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil

  • headers: object containing custom headers for the request

  • normalize_url: true by default



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/meta_inspector/document.rb', line 21

def initialize(initial_url, options = {})
  options             = defaults.merge(options)
  @connection_timeout = options[:connection_timeout]
  @read_timeout       = options[:read_timeout]
  @retries            = options[:retries]
  @html_content_only  = options[:html_content_only]
  @allow_redirections = options[:allow_redirections]
  @document           = options[:document]
  @download_images    = options[:download_images]
  @headers            = options[:headers]
  @warn_level         = options[:warn_level]
  @exception_log      = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
  @normalize_url      = options[:normalize_url]
  @url                = MetaInspector::URL.new(initial_url, exception_log:      @exception_log,
                                                            normalize:          @normalize_url)
  @request            = MetaInspector::Request.new(@url,    allow_redirections: @allow_redirections,
                                                            connection_timeout: @connection_timeout,
                                                            read_timeout:       @read_timeout,
                                                            retries:            @retries,
                                                            exception_log:      @exception_log,
                                                            headers:            @headers) unless @document
  @parser             = MetaInspector::Parser.new(self,     exception_log:      @exception_log,
                                                            download_images:    @download_images)
end

Instance Attribute Details

#allow_redirectionsObject (readonly)

Returns the value of attribute allow_redirections.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def allow_redirections
  @allow_redirections
end

#headersObject (readonly)

Returns the value of attribute headers.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def headers
  @headers
end

#html_content_onlyObject (readonly)

Returns the value of attribute html_content_only.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def html_content_only
  @html_content_only
end

#warn_levelObject (readonly)

Returns the value of attribute warn_level.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def warn_level
  @warn_level
end

Instance Method Details

#to_hashObject

Returns all document data as a nested Hash



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/meta_inspector/document.rb', line 57

def to_hash
  {
    'url'           => url,
    'scheme'        => scheme,
    'host'          => host,
    'root_url'      => root_url,
    'title'         => title,
    'best_title'    => best_title,
    'description'   => description,
    'links'         => links.to_hash,
    'images'        => images.to_a,
    'charset'       => charset,
    'feed'          => feed,
    'content_type'  => content_type,
    'meta_tags'     => meta_tags,
    'favicon'       => images.favicon,
    'response'      => { 'status'  => response.status,
                         'headers' => response.headers }
  }
end

#to_sObject

Returns the contents of the document as a string



79
80
81
# File 'lib/meta_inspector/document.rb', line 79

def to_s
  document
end