Class: MetaInspector::Document

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/meta_inspector/document.rb

Overview

A MetaInspector::Document knows about its URL and its contents

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(initial_url, options = {}) ⇒ Document

Initializes a new instance of MetaInspector::Document, setting the URL Options:

  • connection_timeout: defaults to 20 seconds

  • read_timeout: defaults to 20 seconds

  • retries: defaults to 3 times

  • allow_redirections: when true, follow HTTP redirects. Defaults to true

  • document: the html of the url as a string

  • headers: object containing custom headers for the request

  • normalize_url: true by default

  • faraday_options: an optional hash of options to pass to Faraday on the request



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/meta_inspector/document.rb', line 16

def initialize(initial_url, options = {})
  options             = defaults.merge(options)
  @connection_timeout = options[:connection_timeout]
  @read_timeout       = options[:read_timeout]
  @retries            = options[:retries]
  @encoding           = options[:encoding]

  @allow_redirections     = options[:allow_redirections]
  @allow_non_html_content = options[:allow_non_html_content]

  @document           = options[:document]
  @download_images    = options[:download_images]
  @headers            = options[:headers]
  @normalize_url      = options[:normalize_url]
  @faraday_options    = options[:faraday_options]
  @faraday_http_cache = options[:faraday_http_cache]
  @url                = MetaInspector::URL.new(initial_url, normalize:          @normalize_url)
  @request            = MetaInspector::Request.new(@url,    allow_redirections: @allow_redirections,
                                                            connection_timeout: @connection_timeout,
                                                            read_timeout:       @read_timeout,
                                                            retries:            @retries,
                                                            encoding:           @encoding,
                                                            headers:            @headers,
                                                            faraday_options:    @faraday_options,
                                                            faraday_http_cache: @faraday_http_cache) unless @document
  @parser             = MetaInspector::Parser.new(self,     download_images:    @download_images)
end

Instance Attribute Details

#allow_non_html_contentObject (readonly)

Returns the value of attribute allow_non_html_content.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def allow_non_html_content
  @allow_non_html_content
end

#allow_redirectionsObject (readonly)

Returns the value of attribute allow_redirections.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def allow_redirections
  @allow_redirections
end

#headersObject (readonly)

Returns the value of attribute headers.



4
5
6
# File 'lib/meta_inspector/document.rb', line 4

def headers
  @headers
end

Instance Method Details

#to_hashObject

Returns all document data as a nested Hash



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/meta_inspector/document.rb', line 57

def to_hash
  {
    'url'              => url,
    'scheme'           => scheme,
    'host'             => host,
    'root_url'         => root_url,
    'title'            => title,
    'best_title'       => best_title,
    'author'           => author,
    'best_author'      => best_author,
    'description'      => description,
    'best_description' => best_description,
    'h1'               => h1,
    'h2'               => h2,
    'h3'               => h3,
    'h4'               => h4,
    'h5'               => h5,
    'h6'               => h6,
    'links'            => links.to_hash,
    'images'           => images.to_a,
    'charset'          => charset,
    'feed'             => feed,
    'feeds'            => feeds,
    'content_type'     => content_type,
    'meta_tags'        => meta_tags,
    'favicon'          => images.favicon,
    'response'         => { 'status'  => response.status,
                            'headers' => response.headers }
  }
end

#to_sObject

Returns the contents of the document as a string



89
90
91
# File 'lib/meta_inspector/document.rb', line 89

def to_s
  document
end