Class: TextExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/text_extractor.rb,
lib/text_extractor/value.rb,
lib/text_extractor/record.rb,
lib/text_extractor/version.rb,
lib/text_extractor/filldown.rb,
lib/text_extractor/extraction.rb

Overview

represents an extractor definition

Defined Under Namespace

Modules: Patterns Classes: Extraction, Filldown, Record, Value

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(&block) ⇒ TextExtractor

Returns a new instance of TextExtractor.



10
11
12
13
14
15
16
17
18
19
# File 'lib/text_extractor.rb', line 10

def initialize(&block)
  fail "#{self.class}.new requires a block" unless block
  @values = {}
  @fill = {}
  @values = {}
  @records = []
  @filldowns = []
  @current_record_values = []
  instance_exec(&block)
end

Instance Attribute Details

#recordsObject (readonly)

Returns the value of attribute records.



8
9
10
# File 'lib/text_extractor.rb', line 8

def records
  @records
end

#valuesObject (readonly)

Returns the value of attribute values.



8
9
10
# File 'lib/text_extractor.rb', line 8

def values
  @values
end

Class Method Details

.versionObject



2
3
4
# File 'lib/text_extractor/version.rb', line 2

def self.version
  "0.0.2"
end

Instance Method Details

#boolean(id, re = Patterns::BOOLEAN) ⇒ Object



44
45
46
# File 'lib/text_extractor.rb', line 44

def boolean(id, re = Patterns::BOOLEAN)
  value(id, re) { |val| !val.match(Patterns::FALSE) }
end

#filldown(**kwargs, &block) ⇒ Object



83
84
85
86
# File 'lib/text_extractor.rb', line 83

def filldown(**kwargs, &block)
  fail "#{self.class}.filldown requires a block" unless block
  record(Filldown, **kwargs, &block)
end

#find_record_for(match) ⇒ Object



88
89
90
# File 'lib/text_extractor.rb', line 88

def find_record_for(match)
  records[records.length.times.find_index { |i| match["__#{i}"] }]
end

#float(id, re = Patterns::FLOAT) ⇒ Object



52
53
54
# File 'lib/text_extractor.rb', line 52

def float(id, re = Patterns::FLOAT)
  value(id, re) { |val| Float(val) }
end

#integer(id, re = Patterns::INTEGER) ⇒ Object



48
49
50
# File 'lib/text_extractor.rb', line 48

def integer(id, re = Patterns::INTEGER)
  value(id, re) { |val| Integer(val) }
end

#ipaddr(id, re = Patterns::IPADDR) ⇒ Object



60
61
62
# File 'lib/text_extractor.rb', line 60

def ipaddr(id, re = Patterns::IPADDR)
  value(id, re) { |val| IPAddr.new(val) }
end

#ipnetaddr(id, re = Patterns::IPNETADDR) ⇒ Object



64
65
66
# File 'lib/text_extractor.rb', line 64

def ipnetaddr(id, re = Patterns::IPNETADDR)
  value(id, re) { |val| IPAddr.new(val) }
end

#rational(id, re = Patterns::RATIONAL) ⇒ Object



56
57
58
# File 'lib/text_extractor.rb', line 56

def rational(id, re = Patterns::RATIONAL)
  value(id, re) { |val| Rational(val) }
end

#record(klass = Record, **kwargs, &block) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/text_extractor.rb', line 75

def record(klass = Record, **kwargs, &block)
  fail "#{self.class}.record requires a block" unless block
  @current_record_values = []
  regexp = strip_record(instance_exec(&block))
  kwargs[:values] = @current_record_values
  @records << klass.new(regexp, **kwargs)
end

#regexpsObject



96
97
98
99
100
# File 'lib/text_extractor.rb', line 96

def regexps
  @records.map.with_index do |record, i|
    Regexp.new("(?<__#{i}>#{record.source})", record.options)
  end
end

#scan(input) ⇒ Object



92
93
94
# File 'lib/text_extractor.rb', line 92

def scan(input)
  Extraction.new(input, self).scan.extraction_matches
end

#strip_record(regexp) ⇒ Object



68
69
70
71
72
73
# File 'lib/text_extractor.rb', line 68

def strip_record(regexp)
  lines = regexp.source.lines
  prefix = lines.last
  lines.map! { |s| s.gsub("#{prefix}", "") } if prefix =~ /\A\s*\z/
  Regexp.new(lines.join.strip, regexp.options)
end

#to_reObject



102
103
104
# File 'lib/text_extractor.rb', line 102

def to_re
  Regexp.union(*regexps)
end

#value(id, re, &block) ⇒ Object



36
37
38
39
40
41
42
# File 'lib/text_extractor.rb', line 36

def value(id, re, &block)
  val = @values[id] = Value.new(id, re, &block)
  define_singleton_method(id) do
    @current_record_values << val
    "(?<#{id}>#{re.source})"
  end
end