Class: Reaxar::Page
- Inherits:
-
Object
- Object
- Reaxar::Page
- Defined in:
- lib/reaxar/page.rb
Overview
Represents a web page and provides methods for interacting with its content.
Class Attribute Summary collapse
-
.logger ⇒ Logger?
The logger instance used by the client.
Instance Attribute Summary collapse
-
#client ⇒ Client
readonly
The HTTP client used to fetch the page.
-
#document ⇒ Nokogiri::HTML::Document
readonly
The parsed HTML document.
-
#response ⇒ Object
readonly
The HTTP response object.
-
#url ⇒ String
readonly
The URL of the page.
Class Method Summary collapse
-
.configure(logger:) ⇒ void
Configures the logger for the Page class.
-
.open(url, client = nil) {|page| ... } ⇒ Async::Task
Opens a page asynchronously.
Instance Method Summary collapse
-
#close ⇒ void
Closes the page’s HTTP client.
-
#content_length ⇒ String, Integer
The ‘Content-Length` header value from the response.
-
#content_type ⇒ String?
The ‘Content-Type` header value from the response, or nil if not present.
-
#form(selector = 'form') ⇒ Object?
Finds a form on the page.
-
#headers ⇒ Hash{String => String}?
The response headers as a hash with lowercase keys, or nil if no response is set.
-
#html ⇒ String
Returns the HTML content of the page.
-
#initialize(url, client = nil) {|self| ... } ⇒ Page
constructor
Initializes a new Page instance.
-
#links ⇒ Array<Reaxar::Element::A>
Returns all links (<a> elements) on the page.
-
#submit_form(selector, data = {}) ⇒ Object
Submits a form on the page.
-
#title ⇒ String?
Returns the title of the page.
Constructor Details
#initialize(url, client = nil) {|self| ... } ⇒ Page
Initializes a new Page instance.
59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/reaxar/page.rb', line 59 def initialize(url, client = nil) @url = url @client = client || Client.new(logger: self.class.logger) @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml' @client.use Reaxar::Middleware::Redirect @response = @client.get(url) @document = Reaxar::Parser::FactoryParser.call(content: @response.read, mime_type: @response.headers['content-type']) yield self if block_given? end |
Class Attribute Details
.logger ⇒ Logger?
Returns The logger instance used by the client.
28 29 30 |
# File 'lib/reaxar/page.rb', line 28 def logger @logger end |
Instance Attribute Details
#client ⇒ Client (readonly)
Returns The HTTP client used to fetch the page.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/reaxar/page.rb', line 22 class Page attr_reader :url, :client, :response, :document class << self # @!attribute [rw] logger # @return [Logger, nil] The logger instance used by the client. attr_accessor :logger # Configures the logger for the Page class. # @param logger [Logger] The logger to use. # @return [void] def configure(logger:) self.logger = logger end end # Opens a page asynchronously. # @param url [String] The URL to open. # @param client [Client, nil] Optional HTTP client. # @yield [page] Optional block to yield the page instance. # @yieldparam page [Page] The page instance. # @return [Async::Task] The async task wrapping the page. def self.open(url, client = nil, &block) Async { new(url, client, &block) } end # Closes the page's HTTP client. # # @return [void] def close @client.close end # Initializes a new Page instance. # @param url [String] The URL of the page. # @param client [Client, nil] Optional HTTP client. # @yield [self] Optional block to yield the page instance. def initialize(url, client = nil) @url = url @client = client || Client.new(logger: self.class.logger) @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml' @client.use Reaxar::Middleware::Redirect @response = @client.get(url) @document = Reaxar::Parser::FactoryParser.call(content: @response.read, mime_type: @response.headers['content-type']) yield self if block_given? end # @return [Hash{String => String}, nil] # The response headers as a hash with lowercase keys, or nil if no response is set. # Headers are memoized after the first call. def headers @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase) end # @return [String, nil] # The `Content-Type` header value from the response, or nil if not present. def content_type headers&.[]('content-type') end # @return [String, Integer] # The `Content-Length` header value from the response. # Falls back to the byte size of the HTML content if the header is missing. def content_length headers&.[]('content-length') || html.bytesize end # Returns the title of the page. # @return [String, nil] The page title or nil if not found. def title document.title end # Returns all links (<a> elements) on the page. # @return [Array<Reaxar::Element::A>] The array of link elements. def links @links ||= document.css('a[href]').map do |link| Reaxar::Element::A.new(link, self) end end # Returns the HTML content of the page. # @return [String] The HTML content. def html document.to_html end # Finds a form on the page. # @param selector [String] CSS selector for the form (default: 'form'). # @return [Object, nil] The form element or nil if not found. def form(selector = 'form') # Реализация работы с формами (можно расширить) end # Submits a form on the page. # @param selector [String] CSS selector for the form. # @param data [Hash] Data to submit with the form. # @return [Object] The result of the form submission. def submit_form(selector, data = {}) # Реализация отправки формы end end |
#document ⇒ Nokogiri::HTML::Document (readonly)
Returns The parsed HTML document.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/reaxar/page.rb', line 22 class Page attr_reader :url, :client, :response, :document class << self # @!attribute [rw] logger # @return [Logger, nil] The logger instance used by the client. attr_accessor :logger # Configures the logger for the Page class. # @param logger [Logger] The logger to use. # @return [void] def configure(logger:) self.logger = logger end end # Opens a page asynchronously. # @param url [String] The URL to open. # @param client [Client, nil] Optional HTTP client. # @yield [page] Optional block to yield the page instance. # @yieldparam page [Page] The page instance. # @return [Async::Task] The async task wrapping the page. def self.open(url, client = nil, &block) Async { new(url, client, &block) } end # Closes the page's HTTP client. # # @return [void] def close @client.close end # Initializes a new Page instance. # @param url [String] The URL of the page. # @param client [Client, nil] Optional HTTP client. # @yield [self] Optional block to yield the page instance. def initialize(url, client = nil) @url = url @client = client || Client.new(logger: self.class.logger) @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml' @client.use Reaxar::Middleware::Redirect @response = @client.get(url) @document = Reaxar::Parser::FactoryParser.call(content: @response.read, mime_type: @response.headers['content-type']) yield self if block_given? end # @return [Hash{String => String}, nil] # The response headers as a hash with lowercase keys, or nil if no response is set. # Headers are memoized after the first call. def headers @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase) end # @return [String, nil] # The `Content-Type` header value from the response, or nil if not present. def content_type headers&.[]('content-type') end # @return [String, Integer] # The `Content-Length` header value from the response. # Falls back to the byte size of the HTML content if the header is missing. def content_length headers&.[]('content-length') || html.bytesize end # Returns the title of the page. # @return [String, nil] The page title or nil if not found. def title document.title end # Returns all links (<a> elements) on the page. # @return [Array<Reaxar::Element::A>] The array of link elements. def links @links ||= document.css('a[href]').map do |link| Reaxar::Element::A.new(link, self) end end # Returns the HTML content of the page. # @return [String] The HTML content. def html document.to_html end # Finds a form on the page. # @param selector [String] CSS selector for the form (default: 'form'). # @return [Object, nil] The form element or nil if not found. def form(selector = 'form') # Реализация работы с формами (можно расширить) end # Submits a form on the page. # @param selector [String] CSS selector for the form. # @param data [Hash] Data to submit with the form. # @return [Object] The result of the form submission. def submit_form(selector, data = {}) # Реализация отправки формы end end |
#response ⇒ Object (readonly)
Returns The HTTP response object.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/reaxar/page.rb', line 22 class Page attr_reader :url, :client, :response, :document class << self # @!attribute [rw] logger # @return [Logger, nil] The logger instance used by the client. attr_accessor :logger # Configures the logger for the Page class. # @param logger [Logger] The logger to use. # @return [void] def configure(logger:) self.logger = logger end end # Opens a page asynchronously. # @param url [String] The URL to open. # @param client [Client, nil] Optional HTTP client. # @yield [page] Optional block to yield the page instance. # @yieldparam page [Page] The page instance. # @return [Async::Task] The async task wrapping the page. def self.open(url, client = nil, &block) Async { new(url, client, &block) } end # Closes the page's HTTP client. # # @return [void] def close @client.close end # Initializes a new Page instance. # @param url [String] The URL of the page. # @param client [Client, nil] Optional HTTP client. # @yield [self] Optional block to yield the page instance. def initialize(url, client = nil) @url = url @client = client || Client.new(logger: self.class.logger) @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml' @client.use Reaxar::Middleware::Redirect @response = @client.get(url) @document = Reaxar::Parser::FactoryParser.call(content: @response.read, mime_type: @response.headers['content-type']) yield self if block_given? end # @return [Hash{String => String}, nil] # The response headers as a hash with lowercase keys, or nil if no response is set. # Headers are memoized after the first call. def headers @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase) end # @return [String, nil] # The `Content-Type` header value from the response, or nil if not present. def content_type headers&.[]('content-type') end # @return [String, Integer] # The `Content-Length` header value from the response. # Falls back to the byte size of the HTML content if the header is missing. def content_length headers&.[]('content-length') || html.bytesize end # Returns the title of the page. # @return [String, nil] The page title or nil if not found. def title document.title end # Returns all links (<a> elements) on the page. # @return [Array<Reaxar::Element::A>] The array of link elements. def links @links ||= document.css('a[href]').map do |link| Reaxar::Element::A.new(link, self) end end # Returns the HTML content of the page. # @return [String] The HTML content. def html document.to_html end # Finds a form on the page. # @param selector [String] CSS selector for the form (default: 'form'). # @return [Object, nil] The form element or nil if not found. def form(selector = 'form') # Реализация работы с формами (можно расширить) end # Submits a form on the page. # @param selector [String] CSS selector for the form. # @param data [Hash] Data to submit with the form. # @return [Object] The result of the form submission. def submit_form(selector, data = {}) # Реализация отправки формы end end |
#url ⇒ String (readonly)
Returns The URL of the page.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/reaxar/page.rb', line 22 class Page attr_reader :url, :client, :response, :document class << self # @!attribute [rw] logger # @return [Logger, nil] The logger instance used by the client. attr_accessor :logger # Configures the logger for the Page class. # @param logger [Logger] The logger to use. # @return [void] def configure(logger:) self.logger = logger end end # Opens a page asynchronously. # @param url [String] The URL to open. # @param client [Client, nil] Optional HTTP client. # @yield [page] Optional block to yield the page instance. # @yieldparam page [Page] The page instance. # @return [Async::Task] The async task wrapping the page. def self.open(url, client = nil, &block) Async { new(url, client, &block) } end # Closes the page's HTTP client. # # @return [void] def close @client.close end # Initializes a new Page instance. # @param url [String] The URL of the page. # @param client [Client, nil] Optional HTTP client. # @yield [self] Optional block to yield the page instance. def initialize(url, client = nil) @url = url @client = client || Client.new(logger: self.class.logger) @client.use Reaxar::Middleware::Cookies, jar_file: './cookies.yml' @client.use Reaxar::Middleware::Redirect @response = @client.get(url) @document = Reaxar::Parser::FactoryParser.call(content: @response.read, mime_type: @response.headers['content-type']) yield self if block_given? end # @return [Hash{String => String}, nil] # The response headers as a hash with lowercase keys, or nil if no response is set. # Headers are memoized after the first call. def headers @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase) end # @return [String, nil] # The `Content-Type` header value from the response, or nil if not present. def content_type headers&.[]('content-type') end # @return [String, Integer] # The `Content-Length` header value from the response. # Falls back to the byte size of the HTML content if the header is missing. def content_length headers&.[]('content-length') || html.bytesize end # Returns the title of the page. # @return [String, nil] The page title or nil if not found. def title document.title end # Returns all links (<a> elements) on the page. # @return [Array<Reaxar::Element::A>] The array of link elements. def links @links ||= document.css('a[href]').map do |link| Reaxar::Element::A.new(link, self) end end # Returns the HTML content of the page. # @return [String] The HTML content. def html document.to_html end # Finds a form on the page. # @param selector [String] CSS selector for the form (default: 'form'). # @return [Object, nil] The form element or nil if not found. def form(selector = 'form') # Реализация работы с формами (можно расширить) end # Submits a form on the page. # @param selector [String] CSS selector for the form. # @param data [Hash] Data to submit with the form. # @return [Object] The result of the form submission. def submit_form(selector, data = {}) # Реализация отправки формы end end |
Class Method Details
.configure(logger:) ⇒ void
This method returns an undefined value.
Configures the logger for the Page class.
33 34 35 |
# File 'lib/reaxar/page.rb', line 33 def configure(logger:) self.logger = logger end |
.open(url, client = nil) {|page| ... } ⇒ Async::Task
Opens a page asynchronously.
44 45 46 |
# File 'lib/reaxar/page.rb', line 44 def self.open(url, client = nil, &block) Async { new(url, client, &block) } end |
Instance Method Details
#close ⇒ void
This method returns an undefined value.
Closes the page’s HTTP client.
51 52 53 |
# File 'lib/reaxar/page.rb', line 51 def close @client.close end |
#content_length ⇒ String, Integer
Returns The ‘Content-Length` header value from the response. Falls back to the byte size of the HTML content if the header is missing.
87 88 89 |
# File 'lib/reaxar/page.rb', line 87 def content_length headers&.[]('content-length') || html.bytesize end |
#content_type ⇒ String?
Returns The ‘Content-Type` header value from the response, or nil if not present.
80 81 82 |
# File 'lib/reaxar/page.rb', line 80 def content_type headers&.[]('content-type') end |
#form(selector = 'form') ⇒ Object?
Finds a form on the page.
114 115 116 |
# File 'lib/reaxar/page.rb', line 114 def form(selector = 'form') # Реализация работы с формами (можно расширить) end |
#headers ⇒ Hash{String => String}?
Returns The response headers as a hash with lowercase keys, or nil if no response is set. Headers are memoized after the first call.
74 75 76 |
# File 'lib/reaxar/page.rb', line 74 def headers @headers ||= @response&.headers&.to_h&.transform_keys(&:downcase) end |
#html ⇒ String
Returns the HTML content of the page.
107 108 109 |
# File 'lib/reaxar/page.rb', line 107 def html document.to_html end |
#links ⇒ Array<Reaxar::Element::A>
Returns all links (<a> elements) on the page.
99 100 101 102 103 |
# File 'lib/reaxar/page.rb', line 99 def links @links ||= document.css('a[href]').map do |link| Reaxar::Element::A.new(link, self) end end |
#submit_form(selector, data = {}) ⇒ Object
Submits a form on the page.
122 123 124 |
# File 'lib/reaxar/page.rb', line 122 def submit_form(selector, data = {}) # Реализация отправки формы end |
#title ⇒ String?
Returns the title of the page.
93 94 95 |
# File 'lib/reaxar/page.rb', line 93 def title document.title end |