Class: Brief::Document::ContentExtractor::ExtractionRule

Inherits:
Object
  • Object
show all
Defined in:
lib/brief/document/content_extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(rule) ⇒ ExtractionRule

Returns a new instance of ExtractionRule.



52
53
54
55
# File 'lib/brief/document/content_extractor.rb', line 52

def initialize(rule)
  @rule = rule
  @args = rule.args
end

Instance Attribute Details

#argsObject (readonly)

Returns the value of attribute args.



50
51
52
# File 'lib/brief/document/content_extractor.rb', line 50

def args
  @args
end

#ruleObject (readonly)

Returns the value of attribute rule.



50
51
52
# File 'lib/brief/document/content_extractor.rb', line 50

def rule
  @rule
end

Instance Method Details

#apply_to(document) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/brief/document/content_extractor.rb', line 73

def apply_to(document)
  raise 'Must specify a selector' unless selector

  extracted = document.css(selector)

  return nil if extracted.length == 0

  case
  when deserialize? && format == :json
    (JSON.parse(extracted.text.to_s) rescue {}).to_mash
  when deserialize? && format == :yaml
    (YAML.load(extracted.text.to_s) rescue {}).to_mash
  when selector.match(/first-of-type/) && extracted.length > 0
    extracted.first.text
  else
    extracted.map(&:text)
  end
end

#deserialize?Boolean

Returns:

  • (Boolean)


61
62
63
# File 'lib/brief/document/content_extractor.rb', line 61

def deserialize?
  !!(options.serialize.present? && options.serialize)
end

#formatObject



65
66
67
# File 'lib/brief/document/content_extractor.rb', line 65

def format
  options.serialize.to_sym
end

#optionsObject



57
58
59
# File 'lib/brief/document/content_extractor.rb', line 57

def options
  args[1] || {}.to_mash
end

#selectorObject



69
70
71
# File 'lib/brief/document/content_extractor.rb', line 69

def selector
  args.first if args.first.is_a?(String)
end