Class: RubyCrawl::Result

Inherits:
Object
  • Object
show all
Defined in:
lib/rubycrawl/result.rb

Overview

Result object with lazy clean_markdown conversion.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text:, html:, links:, metadata:) ⇒ Result

Returns a new instance of Result.



8
9
10
11
12
13
# File 'lib/rubycrawl/result.rb', line 8

def initialize(text:, html:, links:, metadata:)
  @text = text
  @html = html
  @links = links
   = 
end

Instance Attribute Details

#htmlObject (readonly)

Returns the value of attribute html.



6
7
8
# File 'lib/rubycrawl/result.rb', line 6

def html
  @html
end

Returns the value of attribute links.



6
7
8
# File 'lib/rubycrawl/result.rb', line 6

def links
  @links
end

#metadataObject (readonly)

Returns the value of attribute metadata.



6
7
8
# File 'lib/rubycrawl/result.rb', line 6

def 
  
end

#textObject (readonly)

Returns the value of attribute text.



6
7
8
# File 'lib/rubycrawl/result.rb', line 6

def text
  @text
end

Instance Method Details

#clean_markdownString

Returns clean markdown converted from the page HTML. Relative URLs are resolved using the page’s final_url.

Returns:

  • (String)

    Markdown content with absolute URLs



19
20
21
# File 'lib/rubycrawl/result.rb', line 19

def clean_markdown
  @clean_markdown ||= MarkdownConverter.convert(html, base_url: final_url)
end

#clean_markdown?Boolean

Check if clean_markdown has been computed.

Returns:

  • (Boolean)


33
34
35
# File 'lib/rubycrawl/result.rb', line 33

def clean_markdown?
  !@clean_markdown.nil?
end

#final_urlString?

The final URL after redirects.

Returns:

  • (String, nil)


26
27
28
# File 'lib/rubycrawl/result.rb', line 26

def final_url
  ['final_url']
end

#to_hObject



37
38
39
40
41
42
43
44
45
# File 'lib/rubycrawl/result.rb', line 37

def to_h
  {
    text: text,
    html: html,
    links: links,
    metadata: ,
    clean_markdown: @clean_markdown
  }
end