Class: TextExtractor
- Inherits:
-
Object
show all
- Defined in:
- lib/text_extractor.rb,
lib/text_extractor/value.rb,
lib/text_extractor/record.rb,
lib/text_extractor/version.rb,
lib/text_extractor/filldown.rb,
lib/text_extractor/directives.rb,
lib/text_extractor/extraction.rb,
lib/text_extractor/inline_value.rb,
lib/text_extractor/directives/group.rb,
lib/text_extractor/directives/classes.rb
Overview
represents an extractor definition
Defined Under Namespace
Modules: Patterns
Classes: Directives, Extraction, Filldown, InlineValue, Record, State, Value
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
-
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
-
#filldown(**kwargs, &block) ⇒ Object
-
#find_record_for(match) ⇒ Object
-
#float(id, re = Patterns::FLOAT) ⇒ Object
-
#initialize(&block) ⇒ TextExtractor
constructor
A new instance of TextExtractor.
-
#inline(id, &block) ⇒ Object
-
#integer(id, re = Patterns::INTEGER) ⇒ Object
-
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
-
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
-
#rational(id, re = Patterns::RATIONAL) ⇒ Object
-
#record(klass = Record, **kwargs, &block) ⇒ Object
-
#regexps ⇒ Object
-
#scan(input) ⇒ Object
-
#section(delimiter, terminator = nil) ⇒ Object
-
#sections(input) ⇒ Object
-
#to_re ⇒ Object
-
#value(id, re, &block) ⇒ Object
Constructor Details
Returns a new instance of TextExtractor.
11
12
13
14
15
16
17
18
19
20
21
22
|
# File 'lib/text_extractor.rb', line 11
def initialize(&block)
raise "#{self.class}.new requires a block" unless block
@values = {}
@fill = {}
@values = {}
@records = []
@filldowns = []
@current_record_values = []
@section_delimiter = nil
@section_terminator = nil
instance_exec(&block)
end
|
Instance Attribute Details
#records ⇒ Object
Returns the value of attribute records.
9
10
11
|
# File 'lib/text_extractor.rb', line 9
def records
@records
end
|
#values ⇒ Object
Returns the value of attribute values.
9
10
11
|
# File 'lib/text_extractor.rb', line 9
def values
@values
end
|
Class Method Details
.expand_directives(re) ⇒ Object
7
8
9
|
# File 'lib/text_extractor/directives.rb', line 7
def self.expand_directives(re)
Directives.new(re).expand
end
|
.version ⇒ Object
2
3
4
|
# File 'lib/text_extractor/version.rb', line 2
def self.version
'0.4.0'
end
|
Instance Method Details
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
51
52
53
|
# File 'lib/text_extractor.rb', line 51
def boolean(id, re = Patterns::BOOLEAN)
value(id, re) { |val| !val.match(Patterns::FALSE) }
end
|
#filldown(**kwargs, &block) ⇒ Object
87
88
89
90
|
# File 'lib/text_extractor.rb', line 87
def filldown(**kwargs, &block)
raise "#{self.class}.filldown requires a block" unless block
record(Filldown, **kwargs, &block)
end
|
#find_record_for(match) ⇒ Object
92
93
94
|
# File 'lib/text_extractor.rb', line 92
def find_record_for(match)
records[records.length.times.find_index { |i| match["__#{i}"] }]
end
|
#float(id, re = Patterns::FLOAT) ⇒ Object
59
60
61
|
# File 'lib/text_extractor.rb', line 59
def float(id, re = Patterns::FLOAT)
value(id, re) { |val| Float(val) }
end
|
#inline(id, &block) ⇒ Object
47
48
49
|
# File 'lib/text_extractor.rb', line 47
def inline(id, &block)
@values[id] = InlineValue.new(id, &block)
end
|
#integer(id, re = Patterns::INTEGER) ⇒ Object
55
56
57
|
# File 'lib/text_extractor.rb', line 55
def integer(id, re = Patterns::INTEGER)
value(id, re) { |val| Integer(val) }
end
|
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
67
68
69
|
# File 'lib/text_extractor.rb', line 67
def ipaddr(id, re = Patterns::IPADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
71
72
73
|
# File 'lib/text_extractor.rb', line 71
def ipnetaddr(id, re = Patterns::IPNETADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#rational(id, re = Patterns::RATIONAL) ⇒ Object
63
64
65
|
# File 'lib/text_extractor.rb', line 63
def rational(id, re = Patterns::RATIONAL)
value(id, re) { |val| Rational(val) }
end
|
#record(klass = Record, **kwargs, &block) ⇒ Object
75
76
77
78
79
80
|
# File 'lib/text_extractor.rb', line 75
def record(klass = Record, **kwargs, &block)
raise "#{self.class}.record requires a block" unless block
kwargs[:extractor_values] = values
kwargs[:values] = @current_record_values = []
@records << klass.new(instance_exec(&block), **kwargs)
end
|
#regexps ⇒ Object
112
113
114
115
116
|
# File 'lib/text_extractor.rb', line 112
def regexps
@records.map.with_index do |record, i|
Regexp.new("(?<__#{i}>#{record.source})", record.options)
end
end
|
#scan(input) ⇒ Object
96
97
98
99
100
101
|
# File 'lib/text_extractor.rb', line 96
def scan(input)
prefill = {}
sections(input).flat_map { |section|
.new(section, self, prefill).scan.
}
end
|
#section(delimiter, terminator = nil) ⇒ Object
82
83
84
85
|
# File 'lib/text_extractor.rb', line 82
def section(delimiter, terminator = nil)
@section_delimiter = delimiter
@section_terminator = terminator
end
|
#sections(input) ⇒ Object
103
104
105
106
107
108
109
110
|
# File 'lib/text_extractor.rb', line 103
def sections(input)
return [input] unless @section_delimiter
texts = input.split(@section_delimiter)
return texts unless @section_terminator
texts.map { |section| section + @section_terminator }
end
|
#to_re ⇒ Object
118
119
120
|
# File 'lib/text_extractor.rb', line 118
def to_re
Regexp.union(*regexps)
end
|
#value(id, re, &block) ⇒ Object
39
40
41
42
43
44
45
|
# File 'lib/text_extractor.rb', line 39
def value(id, re, &block)
val = @values[id] = Value.new(id, re, &block)
define_singleton_method(id) do
@current_record_values << val
"(?<#{id}>#{re.source})"
end
end
|