Class: RubyCrawl::SiteCrawler::PageResult

Inherits:
Object
  • Object
show all
Defined in:
lib/rubycrawl/site_crawler.rb

Overview

Page result yielded to the block with lazy clean_markdown.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url:, html:, links:, metadata:, depth:) ⇒ PageResult

Returns a new instance of PageResult.



12
13
14
15
16
17
18
# File 'lib/rubycrawl/site_crawler.rb', line 12

def initialize(url:, html:, links:, metadata:, depth:)
  @url = url
  @html = html
  @links = links
   = 
  @depth = depth
end

Instance Attribute Details

#depthObject (readonly)

Returns the value of attribute depth.



10
11
12
# File 'lib/rubycrawl/site_crawler.rb', line 10

def depth
  @depth
end

#htmlObject (readonly)

Returns the value of attribute html.



10
11
12
# File 'lib/rubycrawl/site_crawler.rb', line 10

def html
  @html
end

Returns the value of attribute links.



10
11
12
# File 'lib/rubycrawl/site_crawler.rb', line 10

def links
  @links
end

#metadataObject (readonly)

Returns the value of attribute metadata.



10
11
12
# File 'lib/rubycrawl/site_crawler.rb', line 10

def 
  
end

#urlObject (readonly)

Returns the value of attribute url.



10
11
12
# File 'lib/rubycrawl/site_crawler.rb', line 10

def url
  @url
end

Instance Method Details

#clean_markdownObject

Returns clean markdown converted from the page HTML. Relative URLs are resolved using the page’s final_url.



22
23
24
# File 'lib/rubycrawl/site_crawler.rb', line 22

def clean_markdown
  @clean_markdown ||= MarkdownConverter.convert(html, base_url: final_url)
end

#final_urlObject

The final URL after redirects.



27
28
29
# File 'lib/rubycrawl/site_crawler.rb', line 27

def final_url
  ['final_url'] || url
end