Class: TextExtractor::Record
- Inherits:
-
Object
- Object
- TextExtractor::Record
- Defined in:
- lib/text_extractor/record.rb
Defined Under Namespace
Classes: FactoryAnalyzer
Instance Attribute Summary collapse
-
#factory ⇒ Object
readonly
Returns the value of attribute factory.
-
#regexp ⇒ Object
readonly
Returns the value of attribute regexp.
-
#values ⇒ Object
readonly
Returns the value of attribute values.
Instance Method Summary collapse
- #build_extraction(extracted) ⇒ Object
- #build_regexp(regexp, directives, strip) ⇒ Object
- #expand_regexp(regexp, directives) ⇒ Object
- #extract_fills(fill) ⇒ Object
- #extract_values(match) ⇒ Object
-
#extraction(match, fill) ⇒ Object
Array.
- #ignore_regexp(regexp, strip) ⇒ Object
-
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil, **_kwargs) ⇒ Record
constructor
rubocop: disable Metrics/ParameterLists.
- #initialize_inline_values(inline_values) ⇒ Object
- #match(string, pos = 0) ⇒ Object
- #options ⇒ Object
- #regexp_line_ignorer(strip) ⇒ Object
- #regexp_line_stripper(strip) ⇒ Object
- #source ⇒ Object
- #strip_regexp(regexp, strip) ⇒ Object
Constructor Details
#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil, **_kwargs) ⇒ Record
rubocop: disable Metrics/ParameterLists
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/text_extractor/record.rb', line 8 def initialize( regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, strip: nil, **_kwargs ) @factory = factory @constructor = FactoryAnalyzer.new(factory).to_proc @extractor_values = extractor_values @values = values.map { |val| [val.id, val] }.to_h initialize_inline_values(inline) @default_values = values.map { |val| [val.id, nil] }.to_h @regexp = build_regexp(regexp, directives, strip) @fill = Array(fill) end |
Instance Attribute Details
#factory ⇒ Object (readonly)
Returns the value of attribute factory.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def factory @factory end |
#regexp ⇒ Object (readonly)
Returns the value of attribute regexp.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def regexp @regexp end |
#values ⇒ Object (readonly)
Returns the value of attribute values.
5 6 7 |
# File 'lib/text_extractor/record.rb', line 5 def values @values end |
Instance Method Details
#build_extraction(extracted) ⇒ Object
38 39 40 41 |
# File 'lib/text_extractor/record.rb', line 38 def build_extraction(extracted) return extracted unless @constructor @constructor.call(extracted) end |
#build_regexp(regexp, directives, strip) ⇒ Object
43 44 45 46 47 |
# File 'lib/text_extractor/record.rb', line 43 def build_regexp(regexp, directives, strip) stripped = strip_regexp(regexp, strip) = (stripped, directives) ignore_regexp(, strip) end |
#expand_regexp(regexp, directives) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/text_extractor/record.rb', line 70 def (regexp, directives) if directives = Directives.new(regexp) = . .values.each do |value| values[value.id] = @extractor_values.fetch(value.id, value) end else regexp end end |
#extract_fills(fill) ⇒ Object
110 111 112 |
# File 'lib/text_extractor/record.rb', line 110 def extract_fills(fill) @fill.zip(fill.values_at(*@fill)).to_h end |
#extract_values(match) ⇒ Object
114 115 116 |
# File 'lib/text_extractor/record.rb', line 114 def extract_values(match) values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h end |
#extraction(match, fill) ⇒ Object
Returns Array.
31 32 33 34 35 36 |
# File 'lib/text_extractor/record.rb', line 31 def extraction(match, fill) extracted = {}.merge!(@default_values) .merge!(extract_fills(fill)) .merge!(extract_values(match)) [build_extraction(extracted)] end |
#ignore_regexp(regexp, strip) ⇒ Object
83 84 85 86 87 |
# File 'lib/text_extractor/record.rb', line 83 def ignore_regexp(regexp, strip) return regexp unless strip lines = regexp.source.split("\n").map(®exp_line_ignorer(strip)) Regexp.new(lines.join("\n"), regexp.) end |
#initialize_inline_values(inline_values) ⇒ Object
118 119 120 121 122 123 |
# File 'lib/text_extractor/record.rb', line 118 def initialize_inline_values(inline_values) inline_values.each do |value| @values[value] = @extractor_values .fetch(value) { InlineValue.new(value) } end end |
#match(string, pos = 0) ⇒ Object
98 99 100 |
# File 'lib/text_extractor/record.rb', line 98 def match(string, pos = 0) @regexp.match(string, pos) end |
#options ⇒ Object
106 107 108 |
# File 'lib/text_extractor/record.rb', line 106 def @regexp. end |
#regexp_line_ignorer(strip) ⇒ Object
89 90 91 92 93 94 95 96 |
# File 'lib/text_extractor/record.rb', line 89 def regexp_line_ignorer(strip) case strip when :left then ->(s) { "\[ \\t\\r\\f]*#{s}" } when :right then ->(s) { "#{s}\[ \\t\\r\\f]*" } when :both then ->(s) { "\[ \\t\\r\\f]*#{s}\[ \\t\\r\\f]*" } else raise "Unknown ignore whitespace option: #{strip}" end end |
#regexp_line_stripper(strip) ⇒ Object
60 61 62 63 64 65 66 67 68 |
# File 'lib/text_extractor/record.rb', line 60 def regexp_line_stripper(strip) case strip when :left then ->(s) { s.lstrip } when :right then ->(s) { s.rstrip } when :both then ->(s) { s.strip } when nil, false then ->(s) { s } else raise "Unknown strip option: #{strip}" end end |
#source ⇒ Object
102 103 104 |
# File 'lib/text_extractor/record.rb', line 102 def source @regexp.source end |
#strip_regexp(regexp, strip) ⇒ Object
49 50 51 52 53 54 55 56 57 58 |
# File 'lib/text_extractor/record.rb', line 49 def strip_regexp(regexp, strip) lines = regexp.source.split("\n") prefix = lines.last if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/ lines.shift lines = lines.map { |s| s.gsub(prefix, '') } lines = lines.map(®exp_line_stripper(strip)) end Regexp.new(lines.join("\n"), regexp.) end |