Class: TextExtractor
- Inherits:
-
Object
show all
- Defined in:
- lib/text_extractor.rb,
lib/text_extractor/value.rb,
lib/text_extractor/record.rb,
lib/text_extractor/version.rb,
lib/text_extractor/filldown.rb,
lib/text_extractor/extraction.rb
Overview
represents an extractor definition
Defined Under Namespace
Modules: Patterns
Classes: Extraction, Filldown, Record, Value
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
-
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
-
#filldown(**kwargs, &block) ⇒ Object
-
#find_record_for(match) ⇒ Object
-
#float(id, re = Patterns::FLOAT) ⇒ Object
-
#initialize(&block) ⇒ TextExtractor
constructor
A new instance of TextExtractor.
-
#integer(id, re = Patterns::INTEGER) ⇒ Object
-
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
-
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
-
#rational(id, re = Patterns::RATIONAL) ⇒ Object
-
#record(klass = Record, **kwargs, &block) ⇒ Object
-
#regexps ⇒ Object
-
#scan(input) ⇒ Object
-
#strip_record(regexp) ⇒ Object
-
#to_re ⇒ Object
-
#value(id, re, &block) ⇒ Object
Constructor Details
Returns a new instance of TextExtractor.
10
11
12
13
14
15
16
17
18
19
|
# File 'lib/text_extractor.rb', line 10
def initialize(&block)
fail "#{self.class}.new requires a block" unless block
@values = {}
@fill = {}
@values = {}
@records = []
@filldowns = []
@current_record_values = []
instance_exec(&block)
end
|
Instance Attribute Details
#records ⇒ Object
Returns the value of attribute records.
8
9
10
|
# File 'lib/text_extractor.rb', line 8
def records
@records
end
|
#values ⇒ Object
Returns the value of attribute values.
8
9
10
|
# File 'lib/text_extractor.rb', line 8
def values
@values
end
|
Class Method Details
.version ⇒ Object
2
3
4
|
# File 'lib/text_extractor/version.rb', line 2
def self.version
"0.0.2"
end
|
Instance Method Details
#boolean(id, re = Patterns::BOOLEAN) ⇒ Object
44
45
46
|
# File 'lib/text_extractor.rb', line 44
def boolean(id, re = Patterns::BOOLEAN)
value(id, re) { |val| !val.match(Patterns::FALSE) }
end
|
#filldown(**kwargs, &block) ⇒ Object
83
84
85
86
|
# File 'lib/text_extractor.rb', line 83
def filldown(**kwargs, &block)
fail "#{self.class}.filldown requires a block" unless block
record(Filldown, **kwargs, &block)
end
|
#find_record_for(match) ⇒ Object
88
89
90
|
# File 'lib/text_extractor.rb', line 88
def find_record_for(match)
records[records.length.times.find_index { |i| match["__#{i}"] }]
end
|
#float(id, re = Patterns::FLOAT) ⇒ Object
52
53
54
|
# File 'lib/text_extractor.rb', line 52
def float(id, re = Patterns::FLOAT)
value(id, re) { |val| Float(val) }
end
|
#integer(id, re = Patterns::INTEGER) ⇒ Object
48
49
50
|
# File 'lib/text_extractor.rb', line 48
def integer(id, re = Patterns::INTEGER)
value(id, re) { |val| Integer(val) }
end
|
#ipaddr(id, re = Patterns::IPADDR) ⇒ Object
60
61
62
|
# File 'lib/text_extractor.rb', line 60
def ipaddr(id, re = Patterns::IPADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object
64
65
66
|
# File 'lib/text_extractor.rb', line 64
def ipnetaddr(id, re = Patterns::IPNETADDR)
value(id, re) { |val| IPAddr.new(val) }
end
|
#rational(id, re = Patterns::RATIONAL) ⇒ Object
56
57
58
|
# File 'lib/text_extractor.rb', line 56
def rational(id, re = Patterns::RATIONAL)
value(id, re) { |val| Rational(val) }
end
|
#record(klass = Record, **kwargs, &block) ⇒ Object
75
76
77
78
79
80
81
|
# File 'lib/text_extractor.rb', line 75
def record(klass = Record, **kwargs, &block)
fail "#{self.class}.record requires a block" unless block
@current_record_values = []
regexp = strip_record(instance_exec(&block))
kwargs[:values] = @current_record_values
@records << klass.new(regexp, **kwargs)
end
|
#regexps ⇒ Object
96
97
98
99
100
|
# File 'lib/text_extractor.rb', line 96
def regexps
@records.map.with_index do |record, i|
Regexp.new("(?<__#{i}>#{record.source})", record.options)
end
end
|
#scan(input) ⇒ Object
92
93
94
|
# File 'lib/text_extractor.rb', line 92
def scan(input)
.new(input, self).scan.
end
|
#strip_record(regexp) ⇒ Object
68
69
70
71
72
73
|
# File 'lib/text_extractor.rb', line 68
def strip_record(regexp)
lines = regexp.source.lines
prefix = lines.last
lines.map! { |s| s.gsub("#{prefix}", "") } if prefix =~ /\A\s*\z/
Regexp.new(lines.join.strip, regexp.options)
end
|
#to_re ⇒ Object
102
103
104
|
# File 'lib/text_extractor.rb', line 102
def to_re
Regexp.union(*regexps)
end
|
#value(id, re, &block) ⇒ Object
36
37
38
39
40
41
42
|
# File 'lib/text_extractor.rb', line 36
def value(id, re, &block)
val = @values[id] = Value.new(id, re, &block)
define_singleton_method(id) do
@current_record_values << val
"(?<#{id}>#{re.source})"
end
end
|