Class: Deepsearch::Engine::Steps::DataAggregation::ParsedWebsite

Inherits:
Object
  • Object
show all
Defined in:
lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb

Overview

Fetches content from a URL, parses it, and cleans it to extract meaningful text. It handles HTTP requests, content type detection, and removal of unwanted HTML elements.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url:) ⇒ ParsedWebsite

Returns a new instance of ParsedWebsite.



16
17
18
19
20
21
22
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 16

def initialize(url:)
  @url = url
  @content = nil
  @success = false
  @error = nil
  fetch_content!
end

Instance Attribute Details

#contentObject (readonly)

Returns the value of attribute content.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def content
  @content
end

#errorObject (readonly)

Returns the value of attribute error.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def error
  @error
end

#metadataObject (readonly)

Returns the value of attribute metadata.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def 
  
end

#successObject (readonly)

Returns the value of attribute success.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def success
  @success
end

#timestampObject (readonly)

Returns the value of attribute timestamp.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def timestamp
  @timestamp
end

#urlObject (readonly)

Returns the value of attribute url.



14
15
16
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 14

def url
  @url
end

Instance Method Details

#sizeObject



28
29
30
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 28

def size
  content.to_s.size
end

#success?Boolean

Returns:

  • (Boolean)


24
25
26
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 24

def success?
  @success
end

#to_hObject



32
33
34
35
36
37
38
39
# File 'lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb', line 32

def to_h
  {
    url: url,
    success: success?,
    error: error,
    content: content
  }
end