Class: TextExtractor
- Inherits:
-
Object
show all
- Defined in:
- lib/text_extractor.rb,
lib/text_extractor/skip.rb,
lib/text_extractor/guard.rb,
lib/text_extractor/value.rb,
lib/text_extractor/record.rb,
lib/text_extractor/version.rb,
lib/text_extractor/filldown.rb,
lib/text_extractor/directives.rb,
lib/text_extractor/extraction.rb,
lib/text_extractor/inline_value.rb,
lib/text_extractor/directives/group.rb,
lib/text_extractor/directives/classes.rb
Overview
represents an extractor definition
Defined Under Namespace
Modules: Patterns
Classes: Directives, Extraction, Filldown, Guard, GuardError, InlineValue, Record, Skip, State, Value
Constant Summary
collapse
- INDENTED =
{
description: 'indented line',
block: proc {
/
^[^\n\S]+[^\n]*$
/
}
}.freeze
- UNINDENTED =
{
description: 'unindented line',
block: proc {
/
^\S+[^\n]*$
/
}
}.freeze
- DEFAULT =
[
INDENTED,
UNINDENTED
].freeze
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
-
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
-
#factory(object = nil) ⇒ Object
-
#filldown(**kwargs, &block) ⇒ Object
-
#find_record_for(match) ⇒ Object
-
#float(id, re = Patterns::FLOAT) ⇒ Object
-
#guard(**kwargs, &block) ⇒ Object
-
#guards(*guard_args) ⇒ Object
-
#initialize(&block) ⇒ TextExtractor
constructor
rubocop: disable Metrics/MethodLength.
-
#inline(id, &block) ⇒ Object
-
#integer(id, re = Patterns::INTEGER) ⇒ Object
-
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
-
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
-
#rational(id, re = Patterns::RATIONAL) ⇒ Object
-
#record(klass = Record, **kwargs, &block) ⇒ Object
-
#regexps ⇒ Object
-
#scan(input) ⇒ Object
-
#section(delimiter, terminator = nil) ⇒ Object
-
#sections(input) ⇒ Object
-
#skip(**kwargs, &block) ⇒ Object
-
#to_re ⇒ Object
-
#value(id, re, &block) ⇒ Object
Constructor Details
rubocop: disable Metrics/MethodLength
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
# File 'lib/text_extractor.rb', line 14
def initialize(&block)
raise "#{self.class}.new requires a block" unless block
@values = {}
@fill = {}
@values = {}
@records = []
@filldowns = []
@current_record_values = []
@section_delimiter = nil
@section_terminator = nil
@append_guards = []
instance_exec(&block)
@append_guards.each { |g| guard(**g, &g[:block]) }
end
|
Instance Attribute Details
#records ⇒ Object
Returns the value of attribute records.
11
12
13
|
# File 'lib/text_extractor.rb', line 11
def records
@records
end
|
#values ⇒ Object
Returns the value of attribute values.
11
12
13
|
# File 'lib/text_extractor.rb', line 11
def values
@values
end
|
Class Method Details
.expand_directives(re) ⇒ Object
7
8
9
|
# File 'lib/text_extractor/directives.rb', line 7
def self.expand_directives(re)
Directives.new(re).expand
end
|
.version ⇒ Object
2
3
4
|
# File 'lib/text_extractor/version.rb', line 2
def self.version
'0.5.0'
end
|
Instance Method Details
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
57
58
59
|
# File 'lib/text_extractor.rb', line 57
def boolean(id, re = Patterns::BOOLEAN)
value(id, re) { |val| !val.match(Patterns::FALSE) }
end
|
#factory(object = nil) ⇒ Object
94
95
96
97
98
99
100
|
# File 'lib/text_extractor.rb', line 94
def factory(object = nil)
if object
@factory = object
else
@factory
end
end
|
#filldown(**kwargs, &block) ⇒ Object
102
103
104
105
|
# File 'lib/text_extractor.rb', line 102
def filldown(**kwargs, &block)
raise "#{self.class}.filldown requires a block" unless block
record(Filldown, **kwargs, &block)
end
|
#find_record_for(match) ⇒ Object
107
108
109
|
# File 'lib/text_extractor.rb', line 107
def find_record_for(match)
records[records.length.times.find_index { |i| match["__#{i}"] }]
end
|
#float(id, re = Patterns::FLOAT) ⇒ Object
65
66
67
|
# File 'lib/text_extractor.rb', line 65
def float(id, re = Patterns::FLOAT)
value(id, re) { |val| Float(val) }
end
|
#guard(**kwargs, &block) ⇒ Object
111
112
113
114
|
# File 'lib/text_extractor.rb', line 111
def guard(**kwargs, &block)
raise "#{self.class}.guard requires a block" unless block
record(Guard, **kwargs, &block)
end
|
#guards(*guard_args) ⇒ Object
116
117
118
119
|
# File 'lib/text_extractor.rb', line 116
def guards(*guard_args)
guard_args = Guards::DEFAULT if guard_args.empty?
@append_guards = guard_args
end
|
#inline(id, &block) ⇒ Object
53
54
55
|
# File 'lib/text_extractor.rb', line 53
def inline(id, &block)
@values[id] = InlineValue.new(id, &block)
end
|
#integer(id, re = Patterns::INTEGER) ⇒ Object
61
62
63
|
# File 'lib/text_extractor.rb', line 61
def integer(id, re = Patterns::INTEGER)
value(id, re) { |val| Integer(val) }
end
|
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
73
74
75
|
# File 'lib/text_extractor.rb', line 73
def ipaddr(id, re = Patterns::IPADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
77
78
79
|
# File 'lib/text_extractor.rb', line 77
def ipnetaddr(id, re = Patterns::IPNETADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#rational(id, re = Patterns::RATIONAL) ⇒ Object
69
70
71
|
# File 'lib/text_extractor.rb', line 69
def rational(id, re = Patterns::RATIONAL)
value(id, re) { |val| Rational(val) }
end
|
#record(klass = Record, **kwargs, &block) ⇒ Object
81
82
83
84
85
86
87
|
# File 'lib/text_extractor.rb', line 81
def record(klass = Record, **kwargs, &block)
raise "#{self.class}.record requires a block" unless block
kwargs[:extractor_values] = values
kwargs[:factory] ||= @factory if @factory
kwargs[:values] = @current_record_values = []
@records << klass.new(instance_exec(&block), **kwargs)
end
|
#regexps ⇒ Object
142
143
144
145
146
|
# File 'lib/text_extractor.rb', line 142
def regexps
@records.map.with_index do |record, i|
Regexp.new("(?<__#{i}>#{record.source})", record.options)
end
end
|
#scan(input) ⇒ Object
121
122
123
124
125
126
|
# File 'lib/text_extractor.rb', line 121
def scan(input)
prefill = {}
sections(input).flat_map { |section|
.new(section, self, prefill).scan.
}
end
|
#section(delimiter, terminator = nil) ⇒ Object
89
90
91
92
|
# File 'lib/text_extractor.rb', line 89
def section(delimiter, terminator = nil)
@section_delimiter = delimiter
@section_terminator = terminator
end
|
#sections(input) ⇒ Object
128
129
130
131
132
133
134
135
|
# File 'lib/text_extractor.rb', line 128
def sections(input)
return [input] unless @section_delimiter
texts = input.split(@section_delimiter)
return texts unless @section_terminator
texts.map { |section| section + @section_terminator }
end
|
#skip(**kwargs, &block) ⇒ Object
137
138
139
140
|
# File 'lib/text_extractor.rb', line 137
def skip(**kwargs, &block)
raise "#{self.class}.skip requires a block" unless block
record(Skip, **kwargs, &block)
end
|
#to_re ⇒ Object
148
149
150
|
# File 'lib/text_extractor.rb', line 148
def to_re
Regexp.union(*regexps)
end
|
#value(id, re, &block) ⇒ Object
45
46
47
48
49
50
51
|
# File 'lib/text_extractor.rb', line 45
def value(id, re, &block)
val = @values[id] = Value.new(id, re, &block)
define_singleton_method(id) do
@current_record_values << val
"(?<#{id}>#{re.source})"
end
end
|