Class: Botz::Scraper::Html

Inherits:
Object
  • Object
show all
Includes:
ActiveModel::Attributes, ActiveModel::Model
Defined in:
lib/botz/scraper/html.rb

Overview

DSL for parsing html into objects

Defined Under Namespace

Classes: Error

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(scraper_class, resource) ⇒ Html

Returns a new instance of Html.



23
24
25
26
27
# File 'lib/botz/scraper/html.rb', line 23

def initialize(scraper_class, resource)
  @scraper_class = scraper_class
  @url = resource.uri
  @html = resource
end

Instance Attribute Details

#htmlObject (readonly)

Returns the value of attribute html.



21
22
23
# File 'lib/botz/scraper/html.rb', line 21

def html
  @html
end

#scraper_classObject (readonly)

Returns the value of attribute scraper_class.



19
20
21
# File 'lib/botz/scraper/html.rb', line 19

def scraper_class
  @scraper_class
end

#urlObject (readonly)

Returns the value of attribute url.



20
21
22
# File 'lib/botz/scraper/html.rb', line 20

def url
  @url
end

Class Method Details

.field(name, path = nil, persist: true, &block) ⇒ Object

rubocop:disable Metrics/AbcSize, Metrics/MethodLength



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/botz/scraper/html.rb', line 53

def self.field(name, path = nil, persist: true, &block)
  if persist
    field_names << name
    case name
    when /.*\?/
      validates name, inclusion: { in: [true, false] }
    else
      validates name, presence: true, allow_blank: true
    end
  end

  return define_method(name) { instance_exec(html, &block) } if path.nil?
  return define_method(name) { html.search(path).text.strip } if block.nil?

  define_method(name) { html.search(path).first.try { |e| instance_exec(e, &block) } }
end

.field_namesObject



30
31
32
# File 'lib/botz/scraper/html.rb', line 30

def field_names
  @field_names ||= []
end

Instance Method Details

#call {|to_h| ... } ⇒ Object

Yields:



46
47
48
49
50
# File 'lib/botz/scraper/html.rb', line 46

def call
  fail Error.new(scraper_class, url, errors) if invalid?

  yield(to_h)
end

#primary_keyObject



35
36
37
# File 'lib/botz/scraper/html.rb', line 35

def primary_key
  url.to_s
end

#to_hObject



39
40
41
42
43
44
# File 'lib/botz/scraper/html.rb', line 39

def to_h
  fetched_at = Time.current
  fetched_on = fetched_at.beginning_of_day
  timestamps = { fetched_on: fetched_on, fetched_at: fetched_at }
  self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
end