Class: TextExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/text_extractor.rb,
lib/text_extractor/skip.rb,
lib/text_extractor/guard.rb,
lib/text_extractor/value.rb,
lib/text_extractor/record.rb,
lib/text_extractor/version.rb,
lib/text_extractor/filldown.rb,
lib/text_extractor/directives.rb,
lib/text_extractor/extraction.rb,
lib/text_extractor/inline_value.rb,
lib/text_extractor/directives/group.rb,
lib/text_extractor/directives/classes.rb

Overview

represents an extractor definition

Defined Under Namespace

Modules: Patterns Classes: Directives, EmptyRecordError, Extraction, Filldown, Guard, GuardError, InlineValue, Record, Skip, State, Value

Constant Summary collapse

STRIP_PROCS =
{
  left: ->(s) { s.split("\n").map(&:lstrip).join("\n") + "\n" },
  right: ->(s) { s.split("\n").map(&:rstrip).join("\n") + "\n" },
  both: ->(s) { s.split("\n").map(&:strip).join("\n") + "\n" }
}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(&block) ⇒ TextExtractor

Returns a new instance of TextExtractor.



19
20
21
22
23
24
25
26
# File 'lib/text_extractor.rb', line 19

def initialize(&block)
  raise "#{self.class}.new requires a block" unless block

  initialize_options
  initialize_collections
  instance_exec(&block)
  @append_guards.each { |g| guard(**g, &g[:block]) }
end

Instance Attribute Details

#recordsObject (readonly)

Returns the value of attribute records.



17
18
19
# File 'lib/text_extractor.rb', line 17

def records
  @records
end

#valuesObject (readonly)

Returns the value of attribute values.



17
18
19
# File 'lib/text_extractor.rb', line 17

def values
  @values
end

Class Method Details

.expand_directives(re) ⇒ Object



7
8
9
# File 'lib/text_extractor/directives.rb', line 7

def self.expand_directives(re)
  Directives.new(re).expand
end

.versionObject



2
3
4
# File 'lib/text_extractor/version.rb', line 2

def self.version
  '0.6.0'
end

Instance Method Details

#append_newline(activate = nil) ⇒ Object



97
98
99
100
101
102
# File 'lib/text_extractor.rb', line 97

def append_newline(activate = nil)
  return TextExtractor.append_newline if activate.nil? && @append_newline.nil?
  return @append_newline if activate.nil?

  @append_newline = activate
end

#boolean(id, re = Patterns::BOOLEAN) ⇒ Object



73
74
75
# File 'lib/text_extractor.rb', line 73

def boolean(id, re = Patterns::BOOLEAN)
  value(id, re) { |val| !val.match(Patterns::FALSE) }
end

#factory(object = nil) ⇒ Object



129
130
131
132
133
134
135
# File 'lib/text_extractor.rb', line 129

def factory(object = nil)
  if object
    @factory = object
  else
    @factory
  end
end

#filldown(**kwargs, &block) ⇒ Object



137
138
139
140
141
# File 'lib/text_extractor.rb', line 137

def filldown(**kwargs, &block)
  raise "#{self.class}.filldown requires a block" unless block

  record(Filldown, **kwargs, &block)
end

#find_record_for(match) ⇒ Object



143
144
145
# File 'lib/text_extractor.rb', line 143

def find_record_for(match)
  records[records.length.times.find_index { |i| match["__#{i}"] }]
end

#float(id, re = Patterns::FLOAT) ⇒ Object



81
82
83
# File 'lib/text_extractor.rb', line 81

def float(id, re = Patterns::FLOAT)
  value(id, re) { |val| Float(val) }
end

#guard(**kwargs, &block) ⇒ Object



147
148
149
150
151
# File 'lib/text_extractor.rb', line 147

def guard(**kwargs, &block)
  raise "#{self.class}.guard requires a block" unless block

  record(Guard, **kwargs, &block)
end

#guards(*guard_args) ⇒ Object



153
154
155
156
# File 'lib/text_extractor.rb', line 153

def guards(*guard_args)
  guard_args = Guard::DEFAULT if guard_args.empty?
  @append_guards = guard_args
end

#initialize_collectionsObject



36
37
38
39
40
41
42
43
44
# File 'lib/text_extractor.rb', line 36

def initialize_collections
  @values = {}
  @fill = {}
  @values = {}
  @records = []
  @filldowns = []
  @current_record_values = []
  @append_guards = []
end

#initialize_optionsObject



28
29
30
31
32
33
34
# File 'lib/text_extractor.rb', line 28

def initialize_options
  @factory = nil
  @section_delimiter = nil
  @section_terminator = nil
  @strip = nil
  @append_newline = nil
end

#inline(id, &block) ⇒ Object



69
70
71
# File 'lib/text_extractor.rb', line 69

def inline(id, &block)
  @values[id] = InlineValue.new(id, &block)
end

#integer(id, re = Patterns::INTEGER) ⇒ Object



77
78
79
# File 'lib/text_extractor.rb', line 77

def integer(id, re = Patterns::INTEGER)
  value(id, re) { |val| Integer(val) }
end

#ipaddr(id, re = Patterns::IPADDR) ⇒ Object



89
90
91
# File 'lib/text_extractor.rb', line 89

def ipaddr(id, re = Patterns::IPADDR)
  value(id, re) { |val| IPAddr.new(val) }
end

#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object



93
94
95
# File 'lib/text_extractor.rb', line 93

def ipnetaddr(id, re = Patterns::IPNETADDR)
  value(id, re) { |val| IPAddr.new(val) }
end

#rational(id, re = Patterns::RATIONAL) ⇒ Object



85
86
87
# File 'lib/text_extractor.rb', line 85

def rational(id, re = Patterns::RATIONAL)
  value(id, re) { |val| Rational(val) }
end

#record(klass = Record, **kwargs, &block) ⇒ Object



104
105
106
107
108
109
110
111
# File 'lib/text_extractor.rb', line 104

def record(klass = Record, **kwargs, &block)
  raise "#{self.class}.record requires a block" unless block

  kwargs[:extractor_values] = values
  kwargs[:factory] ||= @factory if @factory
  kwargs[:values] = @current_record_values = []
  @records << klass.new(instance_exec(&block), **kwargs)
end

#regexpsObject



182
183
184
185
186
# File 'lib/text_extractor.rb', line 182

def regexps
  @records.map.with_index do |record, i|
    Regexp.new("(?<__#{i}>#{record.source})", record.options)
  end
end

#scan(input) ⇒ Object



158
159
160
161
162
163
164
165
# File 'lib/text_extractor.rb', line 158

def scan(input)
  input = @strip.call(input) if @strip
  input += "\n" if append_newline && !input.end_with?("\n")
  prefill = {}
  sections(input).flat_map { |section|
    Extraction.new(section, self, prefill).scan.extraction_matches
  }
end

#section(delimiter, terminator = nil) ⇒ Object



113
114
115
116
# File 'lib/text_extractor.rb', line 113

def section(delimiter, terminator = nil)
  @section_delimiter = delimiter
  @section_terminator = terminator
end

#sections(input) ⇒ Object



167
168
169
170
171
172
173
174
# File 'lib/text_extractor.rb', line 167

def sections(input)
  return [input] unless @section_delimiter

  texts = input.split(@section_delimiter)
  return texts unless @section_terminator

  texts.map { |section| section + @section_terminator }
end

#skip(**kwargs, &block) ⇒ Object



176
177
178
179
180
# File 'lib/text_extractor.rb', line 176

def skip(**kwargs, &block)
  raise "#{self.class}.skip requires a block" unless block

  record(Skip, **kwargs, &block)
end

#strip(side = nil) ⇒ Object



124
125
126
127
# File 'lib/text_extractor.rb', line 124

def strip(side = nil)
  @strip = STRIP_PROCS[side] ||
           (raise ArgumentError, 'Unknown strip option')
end

#to_reObject



188
189
190
# File 'lib/text_extractor.rb', line 188

def to_re
  Regexp.union(*regexps)
end

#value(id, re, &block) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/text_extractor.rb', line 61

def value(id, re, &block)
  val = @values[id] = Value.new(id, re, &block)
  define_singleton_method(id) do
    @current_record_values << val
    "(?<#{id}>#{re.source})"
  end
end