Class: TextExtractor::Record
- Inherits:
-
Object
- Object
- TextExtractor::Record
- Defined in:
- lib/text_extractor/record.rb
Defined Under Namespace
Classes: FactoryAnalyzer
Instance Attribute Summary collapse
-
#factory ⇒ Object
readonly
Returns the value of attribute factory.
-
#regexp ⇒ Object
readonly
Returns the value of attribute regexp.
-
#values ⇒ Object
readonly
Returns the value of attribute values.
Instance Method Summary collapse
- #build_extraction(extracted) ⇒ Object
- #build_regexp(regexp, directives) ⇒ Object
- #expand_regexp(regexp, directives) ⇒ Object
- #extract_fills(fill) ⇒ Object
- #extract_values(match) ⇒ Object
-
#extraction(match, fill) ⇒ Object
Array.
-
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs) ⇒ Record
constructor
A new instance of Record.
- #initialize_inline_values(inline_values) ⇒ Object
- #match(string, pos = 0) ⇒ Object
- #options ⇒ Object
- #source ⇒ Object
- #strip_regexp(regexp) ⇒ Object
Constructor Details
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs) ⇒ Record
Returns a new instance of Record.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/text_extractor/record.rb', line 7 def initialize( regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs ) @factory = factory @constructor = FactoryAnalyzer.new(factory).to_proc @extractor_values = extractor_values @values = values.map { |val| [val.id, val] }.to_h initialize_inline_values(inline) @default_values = values.map { |val| [val.id, nil] }.to_h @regexp = build_regexp(regexp, directives) @fill = Array(fill) end |
Instance Attribute Details
#factory ⇒ Object (readonly)
Returns the value of attribute factory.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def factory @factory end |
#regexp ⇒ Object (readonly)
Returns the value of attribute regexp.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def regexp @regexp end |
#values ⇒ Object (readonly)
Returns the value of attribute values.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def values @values end |
Instance Method Details
#build_extraction(extracted) ⇒ Object
35 36 37 38 39 |
# File 'lib/text_extractor/record.rb', line 35 def build_extraction(extracted) return extracted unless @constructor @constructor.call(extracted) end |
#build_regexp(regexp, directives) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/text_extractor/record.rb', line 41 def build_regexp(regexp, directives) stripped = strip_regexp(regexp) final = (stripped, directives) raise EmptyRecordError, 'Empty record detected' if final =~ '' final end |
#expand_regexp(regexp, directives) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/text_extractor/record.rb', line 60 def (regexp, directives) if directives = Directives.new(regexp) = . .values.each do |value| values[value.id] = @extractor_values.fetch(value.id, value) end else regexp end end |
#extract_fills(fill) ⇒ Object
85 86 87 |
# File 'lib/text_extractor/record.rb', line 85 def extract_fills(fill) @fill.zip(fill.values_at(*@fill)).to_h end |
#extract_values(match) ⇒ Object
89 90 91 |
# File 'lib/text_extractor/record.rb', line 89 def extract_values(match) values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h end |
#extraction(match, fill) ⇒ Object
Returns Array.
28 29 30 31 32 33 |
# File 'lib/text_extractor/record.rb', line 28 def extraction(match, fill) extracted = {}.merge!(@default_values) .merge!(extract_fills(fill)) .merge!(extract_values(match)) [build_extraction(extracted)] end |
#initialize_inline_values(inline_values) ⇒ Object
93 94 95 96 97 98 |
# File 'lib/text_extractor/record.rb', line 93 def initialize_inline_values(inline_values) inline_values.each do |value| @values[value] = @extractor_values .fetch(value) { InlineValue.new(value) } end end |
#match(string, pos = 0) ⇒ Object
73 74 75 |
# File 'lib/text_extractor/record.rb', line 73 def match(string, pos = 0) @regexp.match(string, pos) end |
#options ⇒ Object
81 82 83 |
# File 'lib/text_extractor/record.rb', line 81 def @regexp. end |
#source ⇒ Object
77 78 79 |
# File 'lib/text_extractor/record.rb', line 77 def source @regexp.source end |
#strip_regexp(regexp) ⇒ Object
50 51 52 53 54 55 56 57 58 |
# File 'lib/text_extractor/record.rb', line 50 def strip_regexp(regexp) lines = regexp.source.split("\n") prefix = lines.last if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/ lines.shift lines = lines.map { |s| s.gsub(prefix, '') } end Regexp.new(lines.join("\n"), regexp.) end |