Class: TextExtractor::Record

Inherits:
Object
  • Object
show all
Defined in:
lib/text_extractor/record.rb

Direct Known Subclasses

Filldown, Guard, Skip

Defined Under Namespace

Classes: FactoryAnalyzer

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs) ⇒ Record

Returns a new instance of Record.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/text_extractor/record.rb', line 7

def initialize(
  regexp,
  factory: nil,
  values: [],
  fill: [],
  directives: true,
  inline: [],
  extractor_values: {},
  **_kwargs
)
  @factory = factory
  @constructor = FactoryAnalyzer.new(factory).to_proc
  @extractor_values = extractor_values
  @values = values.map { |val| [val.id, val] }.to_h
  initialize_inline_values(inline)
  @default_values = values.map { |val| [val.id, nil] }.to_h
  @regexp = build_regexp(regexp, directives)
  @fill = Array(fill)
end

Instance Attribute Details

#factoryObject (readonly)

Returns the value of attribute factory.



5
6
7
# File 'lib/text_extractor/record.rb', line 5

def factory
  @factory
end

#regexpObject (readonly)

Returns the value of attribute regexp.



5
6
7
# File 'lib/text_extractor/record.rb', line 5

def regexp
  @regexp
end

#valuesObject (readonly)

Returns the value of attribute values.



5
6
7
# File 'lib/text_extractor/record.rb', line 5

def values
  @values
end

Instance Method Details

#build_extraction(extracted) ⇒ Object



35
36
37
38
39
# File 'lib/text_extractor/record.rb', line 35

def build_extraction(extracted)
  return extracted unless @constructor

  @constructor.call(extracted)
end

#build_regexp(regexp, directives) ⇒ Object

Raises:



41
42
43
44
45
46
47
48
# File 'lib/text_extractor/record.rb', line 41

def build_regexp(regexp, directives)
  stripped = strip_regexp(regexp)
  final = expand_regexp(stripped, directives)

  raise EmptyRecordError, 'Empty record detected' if final =~ ''

  final
end

#expand_regexp(regexp, directives) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/text_extractor/record.rb', line 60

def expand_regexp(regexp, directives)
  if directives
    expander = Directives.new(regexp)
    expanded = expander.expand
    expander.values.each do |value|
      values[value.id] = @extractor_values.fetch(value.id, value)
    end
    expanded
  else
    regexp
  end
end

#extract_fills(fill) ⇒ Object



85
86
87
# File 'lib/text_extractor/record.rb', line 85

def extract_fills(fill)
  @fill.zip(fill.values_at(*@fill)).to_h
end

#extract_values(match) ⇒ Object



89
90
91
# File 'lib/text_extractor/record.rb', line 89

def extract_values(match)
  values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h
end

#extraction(match, fill) ⇒ Object

Returns Array.

Returns:

  • Array



28
29
30
31
32
33
# File 'lib/text_extractor/record.rb', line 28

def extraction(match, fill)
  extracted = {}.merge!(@default_values)
                .merge!(extract_fills(fill))
                .merge!(extract_values(match))
  [build_extraction(extracted)]
end

#initialize_inline_values(inline_values) ⇒ Object



93
94
95
96
97
98
# File 'lib/text_extractor/record.rb', line 93

def initialize_inline_values(inline_values)
  inline_values.each do |value|
    @values[value] = @extractor_values
                     .fetch(value) { InlineValue.new(value) }
  end
end

#match(string, pos = 0) ⇒ Object



73
74
75
# File 'lib/text_extractor/record.rb', line 73

def match(string, pos = 0)
  @regexp.match(string, pos)
end

#optionsObject



81
82
83
# File 'lib/text_extractor/record.rb', line 81

def options
  @regexp.options
end

#sourceObject



77
78
79
# File 'lib/text_extractor/record.rb', line 77

def source
  @regexp.source
end

#strip_regexp(regexp) ⇒ Object



50
51
52
53
54
55
56
57
58
# File 'lib/text_extractor/record.rb', line 50

def strip_regexp(regexp)
  lines = regexp.source.split("\n")
  prefix = lines.last
  if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/
    lines.shift
    lines = lines.map { |s| s.gsub(prefix, '') }
  end
  Regexp.new(lines.join("\n"), regexp.options)
end