Class: TrajectPlus::Extraction::TransformPipeline

Inherits:
Object
  • Object
show all
Defined in:
lib/traject_plus/extraction.rb

Overview

Pipeline for transforming extracted values into normalized values

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ TransformPipeline

Returns a new instance of TransformPipeline.



14
15
16
# File 'lib/traject_plus/extraction.rb', line 14

def initialize(options)
  @options = options
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



12
13
14
# File 'lib/traject_plus/extraction.rb', line 12

def options
  @options
end

Instance Method Details

#default(values, default_value) ⇒ Object

to_field ‘x’, default: ‘y’ # nil => ‘y’



108
109
110
111
112
113
114
# File 'lib/traject_plus/extraction.rb', line 108

def default(values, default_value)
  if values.present?
    values
  else
    default_value
  end
end

#format(values, insert_string) ⇒ Object

to_field ‘x’, format: ‘-> %s <-’ # ‘abc’ to ‘-> abc <-’



66
67
68
69
70
# File 'lib/traject_plus/extraction.rb', line 66

def format(values, insert_string)
  values.flat_map do |v|
    insert_string % v
  end
end

#match(values, match, index) ⇒ Object

to_field ‘x’, match: [/([aeiou])/, 1] # ‘abc’ => ‘a’



57
58
59
60
61
62
63
# File 'lib/traject_plus/extraction.rb', line 57

def match(values, match, index)
  values.flat_map do |v|
    v.match(match) do |m|
      m[index]
    end
  end
end

#max(values, count, block = nil) ⇒ Object

to_field ‘x’, max: 1 # [‘a’, ‘b’] => [‘b’]



92
93
94
95
96
97
98
# File 'lib/traject_plus/extraction.rb', line 92

def max(values, count, block = nil)
  if block.present?
    values.max(count)
  else
    values.max(count, &block)
  end
end

#min(values, count, block = nil) ⇒ Object

to_field ‘x’, min: 1 # [‘a’, ‘b’] => [‘a’]



83
84
85
86
87
88
89
# File 'lib/traject_plus/extraction.rb', line 83

def min(values, count, block = nil)
  if block.present?
    values.min(count)
  else
    values.min(count, &block)
  end
end

#reject(values, block) ⇒ Object

to_field ‘x’, reject: lambda { |x| x =~ /a/} # [‘a’, ‘b’] => [‘b’]



78
79
80
# File 'lib/traject_plus/extraction.rb', line 78

def reject(values, block)
  values.reject(&block)
end

#select(values, block) ⇒ Object

to_field ‘x’, select: lambda { |x| x =~ /a/} # [‘a’, ‘b’] => [‘a’]



73
74
75
# File 'lib/traject_plus/extraction.rb', line 73

def select(values, block)
  values.select(&block)
end

#transform(values) ⇒ Object



18
19
20
21
22
23
24
25
26
# File 'lib/traject_plus/extraction.rb', line 18

def transform(values)
  options.inject(values) do |memo, (step, params)|
    if step.respond_to? :call
      memo.flat_map { |v| step.call(v, params) }
    else
      public_send(step, memo, params)
    end
  end
end

#translation_map(values, maps) ⇒ Object

Using a named Traject translation map: to_field ‘x’, translation_map: ‘types’ # ‘x’ => ‘mapped x’,



102
103
104
105
# File 'lib/traject_plus/extraction.rb', line 102

def translation_map(values, maps)
  translation_map = Traject::TranslationMap.new(*Array(maps))
  translation_map.translate_array Array(values)
end