Class: TextExtractor::Record
- Inherits:
-
Object
- Object
- TextExtractor::Record
- Defined in:
- lib/text_extractor/record.rb
Direct Known Subclasses
Defined Under Namespace
Classes: FactoryAnalyzer
Instance Attribute Summary collapse
-
#factory ⇒ Object
readonly
Returns the value of attribute factory.
-
#regexp ⇒ Object
readonly
Returns the value of attribute regexp.
-
#values ⇒ Object
readonly
Returns the value of attribute values.
Instance Method Summary collapse
- #build_extraction(extracted) ⇒ Object
- #build_regexp(regexp, directives, strip) ⇒ Object
- #expand_regexp(regexp, directives) ⇒ Object
- #extract_fills(fill) ⇒ Object
- #extract_values(match) ⇒ Object
- #extraction(match, fill) ⇒ Object
- #ignore_regexp(regexp, strip) ⇒ Object
-
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil) ⇒ Record
constructor
A new instance of Record.
- #initialize_inline_values(inline_values) ⇒ Object
- #match(string, pos = 0) ⇒ Object
- #options ⇒ Object
- #regexp_line_ignorer(strip) ⇒ Object
- #regexp_line_stripper(strip) ⇒ Object
- #source ⇒ Object
- #strip_regexp(regexp, strip) ⇒ Object
Constructor Details
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil) ⇒ Record
Returns a new instance of Record.
7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/text_extractor/record.rb', line 7 def initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil) @factory = factory @constructor = FactoryAnalyzer.new(factory).to_proc @extractor_values = extractor_values @values = values.map { |val| [val.id, val] }.to_h initialize_inline_values(inline) @default_values = values.map { |val| [val.id, nil] }.to_h @regexp = build_regexp(regexp, directives, strip) @fill = Array(fill) end |
Instance Attribute Details
#factory ⇒ Object (readonly)
Returns the value of attribute factory.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def factory @factory end |
#regexp ⇒ Object (readonly)
Returns the value of attribute regexp.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def regexp @regexp end |
#values ⇒ Object (readonly)
Returns the value of attribute values.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def values @values end |
Instance Method Details
#build_extraction(extracted) ⇒ Object
26 27 28 29 |
# File 'lib/text_extractor/record.rb', line 26 def build_extraction(extracted) return extracted unless @constructor @constructor.call(extracted) end |
#build_regexp(regexp, directives, strip) ⇒ Object
31 32 33 34 35 |
# File 'lib/text_extractor/record.rb', line 31 def build_regexp(regexp, directives, strip) stripped = strip_regexp(regexp, strip) = (stripped, directives) ignore_regexp(, strip) end |
#expand_regexp(regexp, directives) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/text_extractor/record.rb', line 58 def (regexp, directives) if directives = Directives.new(regexp) = . .values.each { |value| values[value.id] = @extractor_values.fetch(value.id, value) } else regexp end end |
#extract_fills(fill) ⇒ Object
98 99 100 |
# File 'lib/text_extractor/record.rb', line 98 def extract_fills(fill) @fill.zip(fill.values_at(*@fill)).to_h end |
#extract_values(match) ⇒ Object
102 103 104 |
# File 'lib/text_extractor/record.rb', line 102 def extract_values(match) values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h end |
#extraction(match, fill) ⇒ Object
19 20 21 22 23 24 |
# File 'lib/text_extractor/record.rb', line 19 def extraction(match, fill) extracted = {}.merge!(@default_values) .merge!(extract_fills fill) .merge!(extract_values match) build_extraction(extracted) end |
#ignore_regexp(regexp, strip) ⇒ Object
71 72 73 74 75 |
# File 'lib/text_extractor/record.rb', line 71 def ignore_regexp(regexp, strip) return regexp unless strip lines = regexp.source.split("\n").map(®exp_line_ignorer(strip)) Regexp.new(lines.join("\n"), regexp.) end |
#initialize_inline_values(inline_values) ⇒ Object
106 107 108 109 110 111 |
# File 'lib/text_extractor/record.rb', line 106 def initialize_inline_values(inline_values) inline_values.each do |value| @values[value] = @extractor_values .fetch(value) { InlineValue.new(value) } end end |
#match(string, pos = 0) ⇒ Object
86 87 88 |
# File 'lib/text_extractor/record.rb', line 86 def match(string, pos = 0) @regexp.match(string, pos) end |
#options ⇒ Object
94 95 96 |
# File 'lib/text_extractor/record.rb', line 94 def @regexp. end |
#regexp_line_ignorer(strip) ⇒ Object
77 78 79 80 81 82 83 84 |
# File 'lib/text_extractor/record.rb', line 77 def regexp_line_ignorer(strip) case strip when :left then ->(s) { "\[ \\t\\r\\f]*#{s}" } when :right then ->(s) { "#{s}\[ \\t\\r\\f]*" } when :both then ->(s) { "\[ \\t\\r\\f]*#{s}\[ \\t\\r\\f]*" } else raise "Unknown ignore whitespace option: #{strip}" end end |
#regexp_line_stripper(strip) ⇒ Object
48 49 50 51 52 53 54 55 56 |
# File 'lib/text_extractor/record.rb', line 48 def regexp_line_stripper(strip) case strip when :left then ->(s) { s.lstrip } when :right then ->(s) { s.rstrip } when :both then ->(s) { s.strip } when nil, false then ->(s) { s } else raise "Unknown strip option: #{strip}" end end |
#source ⇒ Object
90 91 92 |
# File 'lib/text_extractor/record.rb', line 90 def source @regexp.source end |
#strip_regexp(regexp, strip) ⇒ Object
37 38 39 40 41 42 43 44 45 46 |
# File 'lib/text_extractor/record.rb', line 37 def strip_regexp(regexp, strip) lines = regexp.source.split("\n") prefix = lines.last if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/ lines.shift lines = lines.map { |s| s.gsub(prefix, '') } lines = lines.map(®exp_line_stripper(strip)) end Regexp.new(lines.join("\n"), regexp.) end |