Class: Botz::HtmlScraperMacro

Inherits:
Object
  • Object
show all
Includes:
ActiveModel::Attributes, ActiveModel::Model
Defined in:
lib/botz/html_scraper_macro.rb

Overview

DSL for parsing html into objects

Defined Under Namespace

Classes: Error

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(scraper_class, resource, writer) ⇒ HtmlScraperMacro

Returns a new instance of HtmlScraperMacro.



24
25
26
27
28
29
# File 'lib/botz/html_scraper_macro.rb', line 24

def initialize(scraper_class, resource, writer)
  @scraper_class = scraper_class
  @url = resource.uri
  @html = resource
  @writer = writer
end

Instance Attribute Details

#htmlObject (readonly)

Returns the value of attribute html.



21
22
23
# File 'lib/botz/html_scraper_macro.rb', line 21

def html
  @html
end

#scraper_classObject (readonly)

Returns the value of attribute scraper_class.



19
20
21
# File 'lib/botz/html_scraper_macro.rb', line 19

def scraper_class
  @scraper_class
end

#urlObject (readonly)

Returns the value of attribute url.



20
21
22
# File 'lib/botz/html_scraper_macro.rb', line 20

def url
  @url
end

#writerObject (readonly)

Returns the value of attribute writer.



22
23
24
# File 'lib/botz/html_scraper_macro.rb', line 22

def writer
  @writer
end

Class Method Details

.field(name, path = nil, persist: true, &block) ⇒ Object

rubocop:disable Metrics/AbcSize, Metrics/MethodLength



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/botz/html_scraper_macro.rb', line 55

def self.field(name, path = nil, persist: true, &block)
  if persist
    field_names << name
    case name
    when /.*\?/
      validates name, inclusion: { in: [true, false] }
    else
      validates name, presence: true, allow_blank: true
    end
  end

  return define_method(name) { instance_exec(html, &block) } if path.nil?
  return define_method(name) { html.search(path).text.strip } if block.nil?

  define_method(name) { html.search(path).first.try { |e| instance_exec(e, &block) } }
end

.field_namesObject



32
33
34
# File 'lib/botz/html_scraper_macro.rb', line 32

def field_names
  @field_names ||= []
end

Instance Method Details

#primary_keyObject



37
38
39
# File 'lib/botz/html_scraper_macro.rb', line 37

def primary_key
  url.to_s
end

#saveObject



48
49
50
51
52
# File 'lib/botz/html_scraper_macro.rb', line 48

def save
  fail Error.new(scraper_class, url, errors) if invalid?

  writer.call(to_h)
end

#to_hObject



41
42
43
44
45
46
# File 'lib/botz/html_scraper_macro.rb', line 41

def to_h
  fetched_at = Time.current
  fetched_on = fetched_at.beginning_of_day
  timestamps = { fetched_on: fetched_on, fetched_at: fetched_at }
  self.class.field_names.map { |field| [field, send(field)] }.to_h.merge(timestamps)
end