Class: Reaxar::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/reaxar/page.rb

Overview

Represents a web page and provides methods for interacting with its content.

Examples:

Open a page and print its title

page = Reaxar::Page.open('https://example.com')
puts page.title

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, client = nil) {|self| ... } ⇒ Page

Initializes a new Page instance.

Parameters:

  • url (String)

    The URL of the page.

  • client (Client, nil) (defaults to: nil)

    Optional HTTP client.

Yields:

  • (self)

    Optional block to yield the page instance.



59
60
61
62
63
64
65
66
67
68
69
# File 'lib/reaxar/page.rb', line 59

def initialize(url, client = nil)
  @url = url
  @client = client || Client.new(logger: self.class.logger)
  @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
  @client.use Reaxar::Middleware::Redirect
  @response = @client.get(url)
  @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
                                                 mime_type: @response.headers['content-type'])

  yield self if block_given?
end

Class Attribute Details

.loggerLogger?

Returns The logger instance used by the client.

Returns:

  • (Logger, nil)

    The logger instance used by the client.



28
29
30
# File 'lib/reaxar/page.rb', line 28

def logger
  @logger
end

Instance Attribute Details

#clientClient (readonly)

Returns The HTTP client used to fetch the page.

Returns:

  • (Client)

    The HTTP client used to fetch the page.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/reaxar/page.rb', line 22

class Page
  attr_reader :url, :client, :response, :document

  class << self
    # @!attribute [rw] logger
    #   @return [Logger, nil] The logger instance used by the client.
    attr_accessor :logger

    # Configures the logger for the Page class.
    # @param logger [Logger] The logger to use.
    # @return [void]
    def configure(logger:)
      self.logger = logger
    end
  end

  # Opens a page asynchronously.
  # @param url [String] The URL to open.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [page] Optional block to yield the page instance.
  # @yieldparam page [Page] The page instance.
  # @return [Async::Task] The async task wrapping the page.
  def self.open(url, client = nil, &block)
    Async { new(url, client, &block) }
  end

  # Closes the page's HTTP client.
  #
  # @return [void]
  def close
    @client.close
  end

  # Initializes a new Page instance.
  # @param url [String] The URL of the page.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [self] Optional block to yield the page instance.
  def initialize(url, client = nil)
    @url = url
    @client = client || Client.new(logger: self.class.logger)
    @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
    @client.use Reaxar::Middleware::Redirect
    @response = @client.get(url)
    @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
                                                   mime_type: @response.headers['content-type'])

    yield self if block_given?
  end

  # @return [Hash{String => String}, nil]
  #   The response headers as a hash with lowercase keys, or nil if no response is set.
  #   Headers are memoized after the first call.
  def headers
    @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
  end

  # @return [String, nil]
  #   The `Content-Type` header value from the response, or nil if not present.
  def content_type
    headers&.[]('content-type')
  end

  # @return [String, Integer]
  #   The `Content-Length` header value from the response.
  #   Falls back to the byte size of the HTML content if the header is missing.
  def content_length
    headers&.[]('content-length') || html.bytesize
  end

  # Returns the title of the page.
  # @return [String, nil] The page title or nil if not found.
  def title
    document.title
  end

  # Returns all links (<a> elements) on the page.
  # @return [Array<Reaxar::Element::A>] The array of link elements.
  def links
    @links ||= document.css('a[href]').map do |link|
      Reaxar::Element::A.new(link, self)
    end
  end

  # Returns the HTML content of the page.
  # @return [String] The HTML content.
  def html
    document.to_html
  end

  # Finds a form on the page.
  # @param selector [String] CSS selector for the form (default: 'form').
  # @return [Object, nil] The form element or nil if not found.
  def form(selector = 'form')
    # Реализация работы с формами (можно расширить)
  end

  # Submits a form on the page.
  # @param selector [String] CSS selector for the form.
  # @param data [Hash] Data to submit with the form.
  # @return [Object] The result of the form submission.
  def submit_form(selector, data = {})
    # Реализация отправки формы
  end
end

#documentNokogiri::HTML::Document (readonly)

Returns The parsed HTML document.

Returns:

  • (Nokogiri::HTML::Document)

    The parsed HTML document.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/reaxar/page.rb', line 22

class Page
  attr_reader :url, :client, :response, :document

  class << self
    # @!attribute [rw] logger
    #   @return [Logger, nil] The logger instance used by the client.
    attr_accessor :logger

    # Configures the logger for the Page class.
    # @param logger [Logger] The logger to use.
    # @return [void]
    def configure(logger:)
      self.logger = logger
    end
  end

  # Opens a page asynchronously.
  # @param url [String] The URL to open.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [page] Optional block to yield the page instance.
  # @yieldparam page [Page] The page instance.
  # @return [Async::Task] The async task wrapping the page.
  def self.open(url, client = nil, &block)
    Async { new(url, client, &block) }
  end

  # Closes the page's HTTP client.
  #
  # @return [void]
  def close
    @client.close
  end

  # Initializes a new Page instance.
  # @param url [String] The URL of the page.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [self] Optional block to yield the page instance.
  def initialize(url, client = nil)
    @url = url
    @client = client || Client.new(logger: self.class.logger)
    @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
    @client.use Reaxar::Middleware::Redirect
    @response = @client.get(url)
    @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
                                                   mime_type: @response.headers['content-type'])

    yield self if block_given?
  end

  # @return [Hash{String => String}, nil]
  #   The response headers as a hash with lowercase keys, or nil if no response is set.
  #   Headers are memoized after the first call.
  def headers
    @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
  end

  # @return [String, nil]
  #   The `Content-Type` header value from the response, or nil if not present.
  def content_type
    headers&.[]('content-type')
  end

  # @return [String, Integer]
  #   The `Content-Length` header value from the response.
  #   Falls back to the byte size of the HTML content if the header is missing.
  def content_length
    headers&.[]('content-length') || html.bytesize
  end

  # Returns the title of the page.
  # @return [String, nil] The page title or nil if not found.
  def title
    document.title
  end

  # Returns all links (<a> elements) on the page.
  # @return [Array<Reaxar::Element::A>] The array of link elements.
  def links
    @links ||= document.css('a[href]').map do |link|
      Reaxar::Element::A.new(link, self)
    end
  end

  # Returns the HTML content of the page.
  # @return [String] The HTML content.
  def html
    document.to_html
  end

  # Finds a form on the page.
  # @param selector [String] CSS selector for the form (default: 'form').
  # @return [Object, nil] The form element or nil if not found.
  def form(selector = 'form')
    # Реализация работы с формами (можно расширить)
  end

  # Submits a form on the page.
  # @param selector [String] CSS selector for the form.
  # @param data [Hash] Data to submit with the form.
  # @return [Object] The result of the form submission.
  def submit_form(selector, data = {})
    # Реализация отправки формы
  end
end

#responseObject (readonly)

Returns The HTTP response object.

Returns:

  • (Object)

    The HTTP response object.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/reaxar/page.rb', line 22

class Page
  attr_reader :url, :client, :response, :document

  class << self
    # @!attribute [rw] logger
    #   @return [Logger, nil] The logger instance used by the client.
    attr_accessor :logger

    # Configures the logger for the Page class.
    # @param logger [Logger] The logger to use.
    # @return [void]
    def configure(logger:)
      self.logger = logger
    end
  end

  # Opens a page asynchronously.
  # @param url [String] The URL to open.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [page] Optional block to yield the page instance.
  # @yieldparam page [Page] The page instance.
  # @return [Async::Task] The async task wrapping the page.
  def self.open(url, client = nil, &block)
    Async { new(url, client, &block) }
  end

  # Closes the page's HTTP client.
  #
  # @return [void]
  def close
    @client.close
  end

  # Initializes a new Page instance.
  # @param url [String] The URL of the page.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [self] Optional block to yield the page instance.
  def initialize(url, client = nil)
    @url = url
    @client = client || Client.new(logger: self.class.logger)
    @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
    @client.use Reaxar::Middleware::Redirect
    @response = @client.get(url)
    @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
                                                   mime_type: @response.headers['content-type'])

    yield self if block_given?
  end

  # @return [Hash{String => String}, nil]
  #   The response headers as a hash with lowercase keys, or nil if no response is set.
  #   Headers are memoized after the first call.
  def headers
    @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
  end

  # @return [String, nil]
  #   The `Content-Type` header value from the response, or nil if not present.
  def content_type
    headers&.[]('content-type')
  end

  # @return [String, Integer]
  #   The `Content-Length` header value from the response.
  #   Falls back to the byte size of the HTML content if the header is missing.
  def content_length
    headers&.[]('content-length') || html.bytesize
  end

  # Returns the title of the page.
  # @return [String, nil] The page title or nil if not found.
  def title
    document.title
  end

  # Returns all links (<a> elements) on the page.
  # @return [Array<Reaxar::Element::A>] The array of link elements.
  def links
    @links ||= document.css('a[href]').map do |link|
      Reaxar::Element::A.new(link, self)
    end
  end

  # Returns the HTML content of the page.
  # @return [String] The HTML content.
  def html
    document.to_html
  end

  # Finds a form on the page.
  # @param selector [String] CSS selector for the form (default: 'form').
  # @return [Object, nil] The form element or nil if not found.
  def form(selector = 'form')
    # Реализация работы с формами (можно расширить)
  end

  # Submits a form on the page.
  # @param selector [String] CSS selector for the form.
  # @param data [Hash] Data to submit with the form.
  # @return [Object] The result of the form submission.
  def submit_form(selector, data = {})
    # Реализация отправки формы
  end
end

#urlString (readonly)

Returns The URL of the page.

Returns:

  • (String)

    The URL of the page.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/reaxar/page.rb', line 22

class Page
  attr_reader :url, :client, :response, :document

  class << self
    # @!attribute [rw] logger
    #   @return [Logger, nil] The logger instance used by the client.
    attr_accessor :logger

    # Configures the logger for the Page class.
    # @param logger [Logger] The logger to use.
    # @return [void]
    def configure(logger:)
      self.logger = logger
    end
  end

  # Opens a page asynchronously.
  # @param url [String] The URL to open.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [page] Optional block to yield the page instance.
  # @yieldparam page [Page] The page instance.
  # @return [Async::Task] The async task wrapping the page.
  def self.open(url, client = nil, &block)
    Async { new(url, client, &block) }
  end

  # Closes the page's HTTP client.
  #
  # @return [void]
  def close
    @client.close
  end

  # Initializes a new Page instance.
  # @param url [String] The URL of the page.
  # @param client [Client, nil] Optional HTTP client.
  # @yield [self] Optional block to yield the page instance.
  def initialize(url, client = nil)
    @url = url
    @client = client || Client.new(logger: self.class.logger)
    @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml'
    @client.use Reaxar::Middleware::Redirect
    @response = @client.get(url)
    @document = Reaxar::Parser::FactoryParser.call(content: @response.read,
                                                   mime_type: @response.headers['content-type'])

    yield self if block_given?
  end

  # @return [Hash{String => String}, nil]
  #   The response headers as a hash with lowercase keys, or nil if no response is set.
  #   Headers are memoized after the first call.
  def headers
    @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
  end

  # @return [String, nil]
  #   The `Content-Type` header value from the response, or nil if not present.
  def content_type
    headers&.[]('content-type')
  end

  # @return [String, Integer]
  #   The `Content-Length` header value from the response.
  #   Falls back to the byte size of the HTML content if the header is missing.
  def content_length
    headers&.[]('content-length') || html.bytesize
  end

  # Returns the title of the page.
  # @return [String, nil] The page title or nil if not found.
  def title
    document.title
  end

  # Returns all links (<a> elements) on the page.
  # @return [Array<Reaxar::Element::A>] The array of link elements.
  def links
    @links ||= document.css('a[href]').map do |link|
      Reaxar::Element::A.new(link, self)
    end
  end

  # Returns the HTML content of the page.
  # @return [String] The HTML content.
  def html
    document.to_html
  end

  # Finds a form on the page.
  # @param selector [String] CSS selector for the form (default: 'form').
  # @return [Object, nil] The form element or nil if not found.
  def form(selector = 'form')
    # Реализация работы с формами (можно расширить)
  end

  # Submits a form on the page.
  # @param selector [String] CSS selector for the form.
  # @param data [Hash] Data to submit with the form.
  # @return [Object] The result of the form submission.
  def submit_form(selector, data = {})
    # Реализация отправки формы
  end
end

Class Method Details

.configure(logger:) ⇒ void

This method returns an undefined value.

Configures the logger for the Page class.

Parameters:

  • logger (Logger)

    The logger to use.



33
34
35
# File 'lib/reaxar/page.rb', line 33

def configure(logger:)
  self.logger = logger
end

.open(url, client = nil) {|page| ... } ⇒ Async::Task

Opens a page asynchronously.

Parameters:

  • url (String)

    The URL to open.

  • client (Client, nil) (defaults to: nil)

    Optional HTTP client.

Yields:

  • (page)

    Optional block to yield the page instance.

Yield Parameters:

  • page (Page)

    The page instance.

Returns:

  • (Async::Task)

    The async task wrapping the page.



44
45
46
# File 'lib/reaxar/page.rb', line 44

def self.open(url, client = nil, &block)
  Async { new(url, client, &block) }
end

Instance Method Details

#closevoid

This method returns an undefined value.

Closes the page’s HTTP client.



51
52
53
# File 'lib/reaxar/page.rb', line 51

def close
  @client.close
end

#content_lengthString, Integer

Returns The ‘Content-Length` header value from the response. Falls back to the byte size of the HTML content if the header is missing.

Returns:

  • (String, Integer)

    The ‘Content-Length` header value from the response. Falls back to the byte size of the HTML content if the header is missing.



87
88
89
# File 'lib/reaxar/page.rb', line 87

def content_length
  headers&.[]('content-length') || html.bytesize
end

#content_typeString?

Returns The ‘Content-Type` header value from the response, or nil if not present.

Returns:

  • (String, nil)

    The ‘Content-Type` header value from the response, or nil if not present.



80
81
82
# File 'lib/reaxar/page.rb', line 80

def content_type
  headers&.[]('content-type')
end

#form(selector = 'form') ⇒ Object?

Finds a form on the page.

Parameters:

  • selector (String) (defaults to: 'form')

    CSS selector for the form (default: ‘form’).

Returns:

  • (Object, nil)

    The form element or nil if not found.



114
115
116
# File 'lib/reaxar/page.rb', line 114

def form(selector = 'form')
  # Реализация работы с формами (можно расширить)
end

#headersHash{String => String}?

Returns The response headers as a hash with lowercase keys, or nil if no response is set. Headers are memoized after the first call.

Returns:

  • (Hash{String => String}, nil)

    The response headers as a hash with lowercase keys, or nil if no response is set. Headers are memoized after the first call.



74
75
76
# File 'lib/reaxar/page.rb', line 74

def headers
  @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase)
end

#htmlString

Returns the HTML content of the page.

Returns:

  • (String)

    The HTML content.



107
108
109
# File 'lib/reaxar/page.rb', line 107

def html
  document.to_html
end

Returns all links (<a> elements) on the page.

Returns:



99
100
101
102
103
# File 'lib/reaxar/page.rb', line 99

def links
  @links ||= document.css('a[href]').map do |link|
    Reaxar::Element::A.new(link, self)
  end
end

#submit_form(selector, data = {}) ⇒ Object

Submits a form on the page.

Parameters:

  • selector (String)

    CSS selector for the form.

  • data (Hash) (defaults to: {})

    Data to submit with the form.

Returns:

  • (Object)

    The result of the form submission.



122
123
124
# File 'lib/reaxar/page.rb', line 122

def submit_form(selector, data = {})
  # Реализация отправки формы
end

#titleString?

Returns the title of the page.

Returns:

  • (String, nil)

    The page title or nil if not found.



93
94
95
# File 'lib/reaxar/page.rb', line 93

def title
  document.title
end