Class: Digger::Pattern
- Inherits:
-
Object
- Object
- Digger::Pattern
- Defined in:
- lib/digger/pattern.rb
Overview
Extractor patterns definition
Constant Summary collapse
- MATCH_MAX =
3- TYPES_REGEXP =
0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many match_all]
- TYPES_CSS =
%w[css_one css_many css_all].freeze
- TYPES_JSON =
%w[json jsonp].freeze
- TYPES_OTHER =
%w[cookie plain lines header body].freeze
- TYPES =
TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER
Instance Attribute Summary collapse
-
#block ⇒ Object
Returns the value of attribute block.
-
#type ⇒ Object
Returns the value of attribute type.
-
#value ⇒ Object
Returns the value of attribute value.
Class Method Summary collapse
Instance Method Summary collapse
- #css_match(doc) ⇒ Object
- #get_body(page) ⇒ Object
- #get_cookie(page) ⇒ Object
- #get_header(page) ⇒ Object
- #get_lines(page) ⇒ Object
- #get_plain(page) ⇒ Object
-
#initialize(hash = {}) ⇒ Pattern
constructor
A new instance of Pattern.
- #json_match(page) ⇒ Object
- #match_page(page) ⇒ Object
- #regexp_match(body) ⇒ Object
- #safe_block(&default_block) ⇒ Object
Constructor Details
#initialize(hash = {}) ⇒ Pattern
Returns a new instance of Pattern.
8 9 10 11 12 |
# File 'lib/digger/pattern.rb', line 8 def initialize(hash = {}) hash.each_pair do |key, value| send("#{key}=", value) if %w[type value block].include?(key.to_s) end end |
Instance Attribute Details
#block ⇒ Object
Returns the value of attribute block.
6 7 8 |
# File 'lib/digger/pattern.rb', line 6 def block @block end |
#type ⇒ Object
Returns the value of attribute type.
6 7 8 |
# File 'lib/digger/pattern.rb', line 6 def type @type end |
#value ⇒ Object
Returns the value of attribute value.
6 7 8 |
# File 'lib/digger/pattern.rb', line 6 def value @value end |
Class Method Details
Instance Method Details
#css_match(doc) ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/digger/pattern.rb', line 84 def css_match(doc) # content is Nokogiri::HTML::Document contents = doc.css(value) if type == 'css_many' block = safe_block { |node| node&.content&.strip } contents.map { |node| block.call(node) } elsif type == 'css_all' block = safe_block block.call(contents) else block = safe_block { |node| node&.content&.strip } block.call(contents.first) end end |
#get_body(page) ⇒ Object
59 60 61 |
# File 'lib/digger/pattern.rb', line 59 def get_body(page) safe_block.call(page.body) end |
#get_cookie(page) ⇒ Object
72 73 74 75 |
# File 'lib/digger/pattern.rb', line 72 def (page) = page..find { |c| c.name == value }&.value safe_block.call() end |
#get_header(page) ⇒ Object
54 55 56 57 |
# File 'lib/digger/pattern.rb', line 54 def get_header(page) header = (page.headers[value.to_s.downcase] || []).first safe_block.call(header) end |
#get_lines(page) ⇒ Object
67 68 69 70 |
# File 'lib/digger/pattern.rb', line 67 def get_lines(page) block = safe_block page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) } end |
#get_plain(page) ⇒ Object
63 64 65 |
# File 'lib/digger/pattern.rb', line 63 def get_plain(page) safe_block.call(page.doc&.text) end |
#json_match(page) ⇒ Object
77 78 79 80 81 82 |
# File 'lib/digger/pattern.rb', line 77 def json_match(page) json = page.send(type) keys = json_index_keys(value) match = json_fetch(json, keys) safe_block.call(match) end |
#match_page(page) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/digger/pattern.rb', line 40 def match_page(page) return unless page.success? if TYPES_REGEXP.include?(type) # regular expression regexp_match(page.body) elsif TYPES_CSS.include?(type) # css expression css_match(page.doc) elsif TYPES_JSON.include?(type) json_match(page) elsif TYPES_OTHER.include?(type) send("get_#{type}", page) end end |
#regexp_match(body) ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/digger/pattern.rb', line 99 def regexp_match(body) # content is String if %w[match_many match_all].include? type regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s) matches = body.gsub(regexp).to_a if type == 'match_many' block = safe_block(&:strip) matches.map { |node| block.call(node) } else block = safe_block block.call(matches) end else index = TYPES_REGEXP.index(type) matches = body.match(value) block = safe_block(&:strip) block.call(matches[index]) unless matches.nil? end end |
#safe_block(&default_block) ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/digger/pattern.rb', line 14 def safe_block(&default_block) if block.nil? || (block.is_a?(String) && block.strip.empty?) default_block || ->(v) { v } elsif block.respond_to?(:call) block else proc { $SAFE = 2 eval block }.call end end |