Class: Digger::Pattern

Inherits:
Object
  • Object
show all
Defined in:
lib/digger/pattern.rb

Overview

Extractor patterns definition

Constant Summary collapse

MATCH_MAX =
3
TYPES_REGEXP =
0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
TYPES_CSS =
%w[css_one css_many css_all].freeze
TYPES_JSON =
%w[json jsonp].freeze
TYPES_OTHER =
%w[cookie plain lines header body].freeze
TYPES =
TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(hash = {}) ⇒ Pattern

Returns a new instance of Pattern.



8
9
10
11
12
# File 'lib/digger/pattern.rb', line 8

def initialize(hash = {})
  hash.each_pair do |key, value|
    send("#{key}=", value) if %w[type value block].include?(key.to_s)
  end
end

Instance Attribute Details

#blockObject

Returns the value of attribute block.



6
7
8
# File 'lib/digger/pattern.rb', line 6

def block
  @block
end

#typeObject

Returns the value of attribute type.



6
7
8
# File 'lib/digger/pattern.rb', line 6

def type
  @type
end

#valueObject

Returns the value of attribute value.



6
7
8
# File 'lib/digger/pattern.rb', line 6

def value
  @value
end

Class Method Details

.wrap(hash) ⇒ Object



27
28
29
# File 'lib/digger/pattern.rb', line 27

def self.wrap(hash)
  hash.transform_values { |value| value.is_a?(Pattern) ? value : Pattern.new(value) }
end

Instance Method Details

#css_match(doc) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/digger/pattern.rb', line 84

def css_match(doc)
  # content is Nokogiri::HTML::Document
  contents = doc.css(value)
  if type == 'css_many'
    block = safe_block { |node| node&.content&.strip }
    contents.map { |node| block.call(node) }
  elsif type == 'css_all'
    block = safe_block
    block.call(contents)
  else
    block = safe_block { |node| node&.content&.strip }
    block.call(contents.first)
  end
end

#get_body(page) ⇒ Object



59
60
61
# File 'lib/digger/pattern.rb', line 59

def get_body(page)
  safe_block.call(page.body)
end


72
73
74
75
# File 'lib/digger/pattern.rb', line 72

def get_cookie(page)
  cookie = page.cookies.find { |c| c.name == value }&.value
  safe_block.call(cookie)
end

#get_header(page) ⇒ Object



54
55
56
57
# File 'lib/digger/pattern.rb', line 54

def get_header(page)
  header = (page.headers[value.to_s.downcase] || []).first
  safe_block.call(header)
end

#get_lines(page) ⇒ Object



67
68
69
70
# File 'lib/digger/pattern.rb', line 67

def get_lines(page)
  block = safe_block
  page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) }
end

#get_plain(page) ⇒ Object



63
64
65
# File 'lib/digger/pattern.rb', line 63

def get_plain(page)
  safe_block.call(page.doc&.text)
end

#json_match(page) ⇒ Object



77
78
79
80
81
82
# File 'lib/digger/pattern.rb', line 77

def json_match(page)
  json = page.send(type)
  keys = json_index_keys(value)
  match = json_fetch(json, keys)
  safe_block.call(match)
end

#match_page(page) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/digger/pattern.rb', line 40

def match_page(page)
  return unless page.success?

  if TYPES_REGEXP.include?(type) # regular expression
    regexp_match(page.body)
  elsif TYPES_CSS.include?(type) # css expression
    css_match(page.doc)
  elsif TYPES_JSON.include?(type)
    json_match(page)
  elsif TYPES_OTHER.include?(type)
    send("get_#{type}", page)
  end
end

#regexp_match(body) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/digger/pattern.rb', line 99

def regexp_match(body)
  # content is String
  if %w[match_many match_all].include? type
    regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
    matches = body.gsub(regexp).to_a
    if type == 'match_many'
      block = safe_block(&:strip)
      matches.map { |node| block.call(node) }
    else
      block = safe_block
      block.call(matches)
    end
  else
    index = TYPES_REGEXP.index(type)
    matches = body.match(value)
    block = safe_block(&:strip)
    block.call(matches[index]) unless matches.nil?
  end
end

#safe_block(&default_block) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/digger/pattern.rb', line 14

def safe_block(&default_block)
  if block.nil? || (block.is_a?(String) && block.strip.empty?)
    default_block || ->(v) { v }
  elsif block.respond_to?(:call)
    block
  else
    proc {
      $SAFE = 2
      eval block
    }.call
  end
end