Module: TSV

Extended by:
Annotation
Defined in:
lib/scout/tsv.rb,
lib/scout/tsv/csv.rb,
lib/scout/tsv/open.rb,
lib/scout/tsv/util.rb,
lib/scout/tsv/index.rb,
lib/scout/tsv/attach.rb,
lib/scout/tsv/dumper.rb,
lib/scout/tsv/parser.rb,
lib/scout/tsv/stream.rb,
lib/scout/tsv/traverse.rb,
lib/scout/tsv/change_id.rb,
lib/scout/tsv/util/melt.rb,
lib/scout/tsv/util/sort.rb,
lib/scout/tsv/util/unzip.rb,
lib/scout/tsv/transformer.rb,
lib/scout/tsv/util/filter.rb,
lib/scout/tsv/util/select.rb,
lib/scout/association/item.rb,
lib/scout/tsv/util/process.rb,
lib/scout/tsv/util/reorder.rb,
lib/scout/tsv/change_id/translate.rb

Defined Under Namespace

Classes: Dumper, Parser, Transformer

Constant Summary collapse

KEY_PARAMETERS =
begin
  params = []
  (method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
    params << name if type == :key
  end
  params
end

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Annotation

list_tsv_values, load_info, load_tsv, load_tsv_values, obj_tsv_values, resolve_tsv_array, tsv

Class Method Details

.acceptable_parser_options(func = nil) ⇒ Object



3
4
5
6
7
8
9
10
11
# File 'lib/scout/tsv/parser.rb', line 3

def self.acceptable_parser_options(func = nil)
  if func.nil?
    TSV.method(:parse_line).parameters.collect{|a| a.last } +
      TSV.method(:parse_stream).parameters.collect{|a| a.last } +
      TSV.method(:parse).parameters.collect{|a| a.last } - [:line, :block]
  else
    TSV.method(func).parameters.collect{|a| a.last }
  end.uniq
end

.all_fields(file) ⇒ Object



155
156
157
158
159
160
161
# File 'lib/scout/tsv/util.rb', line 155

def self.all_fields(file)
  if file.respond_to?(:all_fields)
    file.all_fields
  else
    TSV.parse_header(file)["all_fields"]
  end
end

.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/scout/tsv/attach.rb', line 45

def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
  source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
  other = TSV::Parser.new other unless TSV === other || TSV::Parser === other

  fields = [fields] if String === fields

  match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)

  if ! (TSV === other)
    other_key_name = other_key == :key ? other.key_field : other.fields[other_key]
    other = TSV.open other, key_field: other_key_name, fields: fields, one2one: true, persist: persist_input
    other_key = :key if other.key_field == source.key_field
  end

  if TSV::Transformer === source
    source.dumper = case target
                    when :stream
                      TSV::Dumper.new(source.options.merge(sep: "\t"))
                    when nil
                      TSV.setup({}, **source.options.dup)
                    else
                      target
                    end
  end

  other.with_unnamed do
    source.with_unnamed do

      other_key_name = other_key == :key ? other.key_field : other_key
      other_key_name = other.fields[other_key_name] if Integer === other_key
      fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?

      match_key_name = match_key == :key ? source.key_field : match_key_name

      if index.nil? && ! source.identify_field(other_key_name)
        identifier_files = []
        identifier_files << identifiers if identifiers
        identifier_files << source
        identifier_files << TSV.identifier_files(source)
        identifier_files << TSV.identifier_files(other)
        identifier_files << other

        index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name)
      end

      if other_key != :key 
        other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double
      end

      other_field_positions = other.identify_field(fields.dup) 
      fields.zip(other_field_positions) do |o,n|
        raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil? 
      end

      log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
      Log.debug log_message
      bar = log_message if TrueClass === bar

      new = fields - source.fields

      source.fields = (source.fields + fields).uniq

      overlaps = source.identify_field(fields)
      orig_type = source.type

      type = source.type == :single ? :list : source.type

      empty_other_values = case type
                           when :list
                             [nil] * other.fields.length
                           when :flat
                             []
                           when :double
                             [[]] * other.fields.length
                           end

      empty_other_values = nil if other.type == :single

      insitu = TSV === source ? true : false if insitu.nil?
      insitu = false if source.type == :single

      match_key_pos = source.identify_field(match_key)
      source.traverse bar: bar, unnamed: true do |orig_key,current_values|
        current_values = [current_values] if source.type == :single

        keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
        keys = [keys].compact unless Array === keys

        keys = index.chunked_values_at(keys).flatten if index

        current_values = current_values.dup unless insitu
        keys = [nil] if keys.empty?
        keys.each do |current_key|
          other_values = current_key.nil? ? empty_other_values : other[current_key]

          if other_values.nil?
            other_values = empty_other_values
          elsif other.type == :flat 
            other_values = [other_values]
          elsif other.type == :list && source.type == :double
            other_values = other_values.collect{|v| [v] }
          elsif other.type == :double && source.type == :list
            other_values = other_values.collect{|v| v.first }
          end

          other_values = other_field_positions.collect do |pos|
            if pos == :key
              current_key
            else
              other.type == :single ? other_values : other_values[pos]
            end
          end

          other_values.zip(overlaps).each do |v,overlap|
            if type == :list
              current_values[overlap] = v if current_values[overlap].nil? || (String === current_values[overlap] && current_values[overlap].empty?)
            elsif type == :flat
              next if v.nil?
              v = [v] unless Array === v
              current_values.concat v
            else
              current_values[overlap] ||= []
              next if v.nil?
              v = [v] unless Array === v
              current_values[overlap].concat (v - current_values[overlap])
            end
          end
        end
        source[orig_key] = current_values unless insitu
        nil
      end

      if complete && match_key == :key
        empty_self_values = case type
                            when :list
                              [nil] * source.fields.length
                            when :flat
                              []
                            when :double
                              [[]] * source.fields.length
                            end
        other.each do |other_key,other_values|
          next if source.include?(other_key)
          if other.type == :flat 
            other_values = [other_values]
          elsif other.type == :single 
            other_values = [other_values]
          elsif other.type == :list && type == :double
            other_values = other_values.collect{|v| [v] }
          elsif other.type == :double && type == :list
            other_values = other_values.collect{|v| v.first }
          end

          new_values = case type
                       when :list
                         [nil] * source.fields.length
                       when :flat
                         []
                       when :double
                         source.fields.length.times.collect{ [] }
                       end

          other_values.zip(overlaps).each do |v,overlap|
            next if v.nil?
            if overlap == :key
              other_key = Array === v ? v : v.first
            elsif type == :list
              new_values[overlap] = v if new_values[overlap].nil? || (String === new_values[overlap] && new_values[overlap].empty?)
            else
              v = [v] unless Array === v
              new_values[overlap].concat v
            end
          end
          source[other_key] = new_values
        end
      end
      source.type = type
    end
  end

  source
end

.cast_value(value, cast) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/scout/tsv/parser.rb', line 13

def self.cast_value(value, cast)
  if Array === value
    value.collect{|e| cast_value(e, cast) }
  else
    if Proc === cast
      cast.call value
    else
      if value.nil? || value == ""
        nil
      else
        value.send(cast)
      end
    end
  end
end

.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/scout/tsv/change_id.rb', line 33

def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false)
  source = TSV::Parser.new source if String === source

  identifiers = identifiers.nil? ? source.identifiers : identifiers

  new_fields = source.fields.dup
  new_fields[new_fields.index(source_id)] = new_id
  return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields)
end

.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/scout/tsv/change_id.rb', line 4

def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil)
  source = TSV::Parser.new source if String === source
  identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
  if identifiers && source.identify_field(new_key_field, strict: true).nil?
    identifiers = identifiers.nil? ? source.identifiers : identifiers
    if Array === identifiers
      identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last
    end
    new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
    new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
    return new
  end

  fields = source.fields.dup - [new_key_field]
  fields.unshift source.key_field if keep
  transformer = TSV::Transformer.new source
  transformer.key_field = new_key_field
  transformer.fields = fields
  transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v|
    [k, v]
  end

  stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
end

.collapse_stream(stream, *args, **kwargs, &block) ⇒ Object



221
222
223
224
225
226
227
# File 'lib/scout/tsv/open.rb', line 221

def self.collapse_stream(stream, *args, **kwargs, &block)
  stream = stream.stream if stream.respond_to?(:stream)
  self.process_stream(stream) do |sin, line|
    collapsed = Open.collapse_stream(stream, line: line)
    Open.consume_stream(collapsed, false, sin)
  end
end

.concat_streams(streams) ⇒ Object



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/scout/tsv/stream.rb', line 219

def self.concat_streams(streams)

  streams = streams.collect do |stream|
    case stream
    when(defined? Step and Step)
      stream.stream
    when Path
      stream.open
    when TSV::Dumper
      stream.stream
    when TSV
      stream.dumper_stream
    else
      stream
    end
  end.compact

  done_streams = []
  Open.open_pipe do |sin|
    first_stream = streams.first
    while line = first_stream.gets
      sin.write line
      break unless line[0] == "#"
    end

    while streams.any?
      streams.each do |stream|
        line = stream.gets
        sin.write line unless line[0] == "#"
      end
      streams.delete_if{|stream| stream.eof? }
    end
  end
end

.csv(obj, options = {}) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/scout/tsv/csv.rb', line 4

def self.csv(obj, options = {}) 
  options = IndiferentHash.add_defaults options, :headers => true, :type => :list
  headers = options[:headers]

  noheaders = ! headers

  type = options.delete :type
  cast = options.delete :cast
  merge = options.delete :merge
  key_field = options.delete :key_field
  fields = options.delete :fields
  
  if key_field || fields
    orig_type = type
    type = :double
    merge = true
  end

  options[:headers] = false

  csv = case obj
        when Path
          CSV.read obj.find.open, **options
        when String
          if Open.remote?(obj)
            CSV.read Open.open(obj), **options
          elsif Path.is_filename?(obj)
            CSV.read obj, **options
          else
            CSV.new obj, **options
          end
        else
          CSV.new obj, **options
        end

  tsv = if noheaders
          TSV.setup({}, :key_field => nil, :fields => nil, :type => type)
        else
          key, *csv_fields = csv.shift
          TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type)
        end

  csv.each_with_index do |row,i|
    if noheaders
      key, values = ["row-#{i}", row]
    else
      key, *values = row
    end
    
    if cast
      values = values.collect{|v| v.send cast }
    end

    case type
    when :double, :flat
      tsv.zip_new(key, values)
    when :single
      tsv[key] = values.first
    when :list
      tsv[key] = values
    end
  end

  if key_field || fields
    tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true)
    if tsv.type != orig_type
      tsv = case orig_type
            when :list
              tsv.to_list
            when :single
              tsv.to_single
            when :list
              tsv.to_list
            when :flat
              tsv.to_flat
            end
    end
  end

  tsv
end

.field_match_counts(file, values, options = {}) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/scout/tsv/util.rb', line 13

def self.field_match_counts(file, values, options = {})
  options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
  persist_options = IndiferentHash.pull_keys options, :persist

  filename = TSV === file ? file.filename : file
  path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
    tsv = TSV === file ? file : TSV.open(file, options)

    text = ""
    fields = nil
    tsv.tap{|e| e.unnamed =  true; fields = e.fields}.through do |gene, names|
      names.zip(fields).each do |list, format|
        list = [list] unless Array === list
        list.delete_if do |name| name.empty? end
        next if list.empty?
        text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
      end
      text << [gene, tsv.key_field] * "\t" << "\n"
    end
    text
  end

  path = path.find if Path === path
  TmpFile.with_file(values.uniq * "\n", false) do |value_file|
    cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2  |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
    begin
      TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
    rescue
      Log.exception $!
      TSV.setup({}, :type => :single, :cast => :to_i)
    end
  end
end

.identifier_files(obj) ⇒ Object



255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/scout/tsv/attach.rb', line 255

def self.identifier_files(obj)
  if TSV === obj
    obj.identifier_files
  elsif Path.is_filename?(obj)
    path = Path === obj ? obj : Path.setup(obj)
    if obj.dirname.identifiers.exists?
      obj.dirname.identifiers
    else
      [TSV.parse_options(obj)[:identifiers]]
    end
  else
    nil
  end
end

.identify_field(key_field, fields, name, strict: nil) ⇒ Object



47
48
49
50
51
# File 'lib/scout/tsv/util.rb', line 47

def self.identify_field(key_field, fields, name, strict: nil)
  return :key if name == :key || (! strict && NamedArray.field_match(key_field, name))
  name.collect!{|n| NamedArray.field_match(key_field, n) ? :key : n } if Array === name
  NamedArray.identify_name(fields, name, strict: strict)
end

.identify_field_in_obj(obj, field) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/scout/tsv/change_id/translate.rb', line 3

def self.identify_field_in_obj(obj, field)
  case obj
  when TSV
    obj.identify_field(field)
  when TSV::Parser, TSV::Dumper
    TSV.identify_field(obj.key_field, obj.fields, field)
  when Path, String
    all_fields = TSV.parse_header(obj)["all_fields"]
    identify_field_in_obj(all_fields, field)
  when Array
    key_field, *fields = obj
    TSV.identify_field(key_field, fields, field)
  end
end

.incidence(tsv, **kwargs) ⇒ Object



224
225
226
# File 'lib/scout/association/item.rb', line 224

def self.incidence(tsv, **kwargs)
  AssociationItem.incidence Association.index(tsv, **kwargs).keys
end

.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/scout/tsv/index.rb', line 40

def self.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs)
  kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
  engine = IndiferentHash.process_options kwargs, :engine

  fields = :all if fields.nil?

  prefix = case fields
           when :all
             "Index[#{target}]"
           else
             "Index[#{Array === fields ? fields * "," : fields}->#{target}]"
           end

  prefix += select_prefix_str(kwargs[:select])

  persist_options = IndiferentHash.pull_keys kwargs, :persist
  persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :engine => :HDB, :persist => false

  data_options = IndiferentHash.pull_keys kwargs, :data

  Persist.persist(tsv_file, persist_options[:engine], persist_options.merge(other_options: kwargs.merge(target: target, fields: fields, order: order, data_options: data_options))) do |filename|
    if filename
      index = ScoutCabinet.open(filename, true, engine)
      TSV.setup(index, :type => :single)
      index.extend TSVAdapter 
    else
      index = TSV.setup({}, :type => :single)
    end

    log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
    Log.low log_msg
    bar = log_msg if TrueClass === bar

    if order
      tmp_index = {}
      include_self = fields == :all || (Array === fields) && fields.include?(target)
      target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, bar: bar, **kwargs do |k,values|
        tmp_index[k] ||= [[k]] if include_self
        values.each_with_index do |list,i|
          i += 1 if include_self
          list.each do |e|
            tmp_index[e] ||= []
            tmp_index[e][i] ||= []
            tmp_index[e][i] << k
          end
        end
      end
      tmp_index.each do |e,list|
        index[e] = list.flatten.compact.uniq.first
      end

      index.key_field = source_field_names * ","
      index.fields = [target_key_field]

      tmp_index = {}

    else
      target_key_field, source_field_names =  Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
        values.each do |e|
          index[e] = k unless index.include?(e)
        end
      end
    end

    index.key_field = source_field_names * ","
    index.fields = [target_key_field]

    index
  end
end

.match_keys(source, other, match_key: nil, other_key: nil) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/scout/tsv/attach.rb', line 3

def self.match_keys(source, other, match_key: nil, other_key: nil)
  #match_key = (source.all_fields & other.all_fields).first if match_key.nil?
  if match_key.nil?
    match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first
    match_key = source.all_fields[match_key_pos] if match_key_pos
  end

  if match_key.nil?
    source.all_fields.collect do |f|
      other_key = other.identify_field(f)
      if other_key
        other_key = other.key_field if other_key == :key
        match_key = f
        break
      end
    end
  end

  if match_key.nil?
    other.all_fields.collect do |f|
      match_key = source.identify_field(f)
      if match_key
        other_key = f
        break
      end
    end
  end

  match_key = source.key_field if match_key.nil? 

  if other_key.nil?
    other_key = other.identify_field(match_key)
  end

  other_key = other.key_field if other_key.nil?

  match_key = :key if NamedArray.field_match(match_key, source.key_field)
  other_key = :key if NamedArray.field_match(other_key, other.key_field)

  [match_key, other_key]
end

.open(file, options = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/scout/tsv.rb', line 75

def self.open(file, options = {})
  grep, invert_grep, fixed_grep, nocache, monitor, entity_options, unnamed, field = IndiferentHash.process_options options, :grep, :invert_grep, :fixed_grep, :nocache, :monitor, :entity_options, :unnamed, :field

  if field and options[:field].nil?
    options[:fields] = [field]
    options[:type] ||= :single
  end

  persist_options = IndiferentHash.pull_keys options, :persist
  persist_options = IndiferentHash.add_defaults persist_options, prefix: "TSV", type: :HDB, persist: false
  persist_options[:data] ||= options[:data]
  persist_options[:update] = options.delete(:update) if options.include?(:update)

  file = StringIO.new file if String === file && ! (Path === file) && file.index("\n")

  source_name, options = 
    case file
    when StringIO
      [file.inspect, options]
    when TSV::Parser
      [file.options[:filename], file.options]
    else
      [file, options]
    end

  Persist.tsv(source_name, options, persist_options: persist_options) do |data|
    options[:data] = data if data
    options[:filename] ||= if TSV::Parser === file
                           file.options[:filename]
                         elsif Path === file
                           file
                         elsif file.respond_to?(:filename)
                           file.filename
                         elsif Path.is_filename?(file)
                           file
                         else
                           nil
                         end

    if data
      Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
    else
      Log.debug "TSV open #{Log.fingerprint file}"
    end

    tsv = if TSV::Parser === file
            TSV.parse(file, **options)
          else
            options[:tsv_invert_grep] ||= invert_grep if invert_grep
            Open.open(file, grep: grep, invert_grep: invert_grep, fixed_grep: fixed_grep, nocache: nocache) do |f|
              TSV.parse(f, **options)
            end
          end

    if tsv.identifiers.nil? and Path === tsv.filename and tsv.filename.identifier_file_path
      tsv.identifiers = tsv.filename.identifier_file_path.find if tsv.filename.identifier_file_path.exists?
    end

    tsv.unnamed = unnamed unless unnamed.nil?

    tsv.entity_options = entity_options

    tsv
  end
end

.original_setupObject



34
# File 'lib/scout/tsv.rb', line 34

alias original_setup setup

.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block) ⇒ Object



470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/scout/tsv/parser.rb', line 470

def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block)
  parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep)

  cast = kwargs[:cast]
  cast = parser.options[:cast] if cast.nil?
  identifiers = kwargs.delete(:identifiers)
  type = kwargs[:type] ||=  parser.options[:type] ||= :double

  if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
    TSV.setup(data, type: type)
    data.extend TSVAdapter
    serializer ||= if cast
                     case [cast, type]
                     when [:to_i, :single]
                       :integer
                     when [:to_i, :list], [:to_i, :flat]
                       :integer_array
                     when [:to_f, :single]
                       :float
                     when [:to_f, :list], [:to_f, :flat]
                       :float_array
                     when [:to_f, :double], [:to_i, :double]
                       :marshal
                     else
                       type
                     end
                   else
                     type
                   end
    data.serializer = TSVAdapter::SERIALIZER_ALIAS[serializer] || serializer
  end

  kwargs[:data] = {} if kwargs[:data].nil?

  data = parser.traverse **kwargs, &block
  data.type = type
  data.cast = cast
  data.filename = filename || parser.options[:filename] if data.filename.nil?
  data.namespace = namespace || parser.options[:namespace] if data.namespace.nil?
  data.identifiers = identifiers || parser.options[:identifiers] if data.identifiers.nil?
  data.unnamed = unnamed
  data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
  data
end

.parse_header(stream, fix: true, header_hash: '#', sep: "\t") ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/scout/tsv/parser.rb', line 257

def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
  sep = "\t" if sep.nil?
  if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
    Open.open(stream) do |f|
      return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
    end
  end

  if IO === stream && stream.closed?
    stream.join if stream.respond_to?(:join)
    raise "Closed stream" 
  end

  opts = {}
  preamble = []

  # Get line

  begin
    #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
    line = stream.gets
    return {} if line.nil?
    line = Misc.fixutf8 line.chomp if fix

    # Process options line
    if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
      opts = IndiferentHash.string2hash m.captures.first.chomp
      line = stream.gets
      if line && fix
        if Proc === fix
          line = fix.call line
        else
          line = Misc.fixutf8 line.chomp if line && fix
        end
      end
    end

    # Determine separator
    sep = opts[:sep] if opts[:sep]

    # Process fields line
    preamble << line if line
    while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
      fields = line.split(sep, -1)
      key_field = fields.shift
      key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?

      line = (header_hash != "" ?  stream.gets : nil)
      line = Misc.fixutf8 line.chomp if line
      preamble << line if line
      break if TrueClass === header_hash || header_hash == ""
    end

    preamble = preamble[0..-3] * "\n"

    line ||= stream.gets

    first_line = line

    opts[:type] = opts[:type].to_sym if opts[:type]
    opts[:cast] = opts[:cast].to_sym if opts[:cast]

    all_fields = [key_field] + fields if key_field && fields
    namespace = opts[:namespace]
    NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace))
  rescue Exception
    raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
    stream.abort($!) if stream.respond_to?(:abort)
    raise $!
  end
end

.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/scout/tsv/parser.rb', line 29

def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
  items = line.split(sep, -1)

  return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)

  if String === key
    raise "Key by name, but no field names" if field_names.nil?
    key = field_names.index key
    raise "Key #{key} not found in field names #{Log.fingerprint field_names}" if key.nil?
  end

  if positions.nil? && key == 0
    key = items.shift
  elsif positions.nil?
    if type == :flat
      key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
      items = items.slice(0,1)
    else
      key = items.delete_at(key)
    end
    key = key.split(sep2) if type == :double
  else 
    key, items = items[key], items.values_at(*positions)
    key = key.split(sep2) if type == :double || type == :flat
  end

  items = case type
          when :list
            items
          when :single
            items.first
          when :flat
            items.collect{|i| i.split(sep2, -1) }.flatten
          when :double
            items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
          end


  if cast
    items = cast_value(items, cast)
  end

  [key, items]
end

.parse_optionsObject



329
330
331
# File 'lib/scout/tsv/parser.rb', line 329

def self.parse_options(...)
  parse_header(...)[:options]
end

.parse_stream(stream, data: nil, source_type: nil, sep: "\t", type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/scout/tsv/parser.rb', line 74

def self.parse_stream(stream, data: nil, source_type: nil, sep: "\t", type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block)
  begin
    bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
    bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
    bar.init if bar

    source_type = type if source_type.nil?

    type_swap_key = [source_type.to_s, type.to_s] * "_"

    same_type = source_type.to_s == type.to_s

    if data && data.respond_to?(:load_stream) && 
        data.serializer.to_s.include?("String") &&
        same_type && 
        ! (head || kwargs[:cast] || kwargs[:positions] || (kwargs[:key] && kwargs[:key] != 0) || Proc === fix ) &&
        (sep.nil? || sep == "\t")


      Log.debug "Loading #{Log.fingerprint stream} directly into #{Log.fingerprint data}"
      if first_line
        full_stream = Open.open_pipe do |sin|
          sin.puts first_line
          Open.consume_stream(stream, false, sin)
        end
        data.load_stream(full_stream)
      else
        data.load_stream(stream)
      end

      return data
    end


    data = {} if data.nil?
    merge = false if type != :double && type != :flat
    line = first_line || stream.gets
    while line 
      break if head && head <= 0
      begin
        line.chomp!
        if Proc === fix
          line = fix.call line
          break if (FalseClass === line) || :break == line
          next if line.nil?
        elsif fix
          line = Misc.fixutf8(line)
        end
        bar.tick if bar

        if type == :array || type == :line
          block.call line
          next
        elsif type == :matrix
          parts = line.split(sep)
          block.call parts
          next
        end

        key, items = parse_line(line, type: source_type, field_names: field_names, sep: sep, **kwargs)

        next if key.nil?

        if Array === key
          keys = key
          if one2one
            key_items = keys.length.times.collect{|i| items.collect{|list| [list[i] || list[0]] } }
          else
            key_items = false
          end
        else
          keys = [key]
          key_items = false
        end

        keys.each_with_index do |key,i|
          if key_items
            these_items = key_items[i]
          else
            these_items = items
          end

          these_items = 
            case type_swap_key
            when "single_single"
              these_items
            when "list_single"
              these_items.first
            when "flat_single"
              these_items.first
            when "double_single"
              these_items.first.first
            when "single_list"
              [these_items]
            when "list_list"
              these_items
            when "flat_list"
              these_items
            when "double_list"
              these_items.collect{|l| l.first }
            when "single_flat"
              [these_items]
            when "list_flat"
              these_items
            when "flat_flat"
              these_items
            when "double_flat"
              these_items.flatten
            when "single_double"
              [[these_items]]
            when "list_double"
              these_items.collect{|l| l.nil? ? [] : [l] }
            when "flat_double"
              [these_items]
            when "double_double"
              these_items
            end

          if block_given?
            res = block.call(key, these_items, field_names)
            data[key] = res unless res.nil? || FalseClass === data
            next
          end

          if ! merge || ! data.include?(key)
            these_items = these_items.collect{|i| i.empty? ? [nil] : i } if type == :double && one2one
            data[key] = these_items
          elsif type == :double
            current = data[key]
            if merge == :concat
              these_items.each_with_index do |new,i|
                new = one2one ? [nil] : [] if new.empty?
                current[i].concat(new)
              end
            else
              merged = []
              these_items.each_with_index do |new,i|
                new = one2one ? [nil] : [] if new.empty?
                merged[i] = (current[i] || []) + new
              end
              data[key] = merged
            end
          elsif type == :flat
            current = data[key]
            if merge == :concat
              current[i].concat these_items
            else
              data[key] = current + these_items
            end
          end
        end
      rescue Exception
        raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
        stream.abort($!) if stream.respond_to?(:abort)
        raise $!
      ensure
        head = head - 1 if head
        if stream.closed?
          line = nil
        else
          line = stream.gets 
        end
      end
    end
    data
  ensure
    if stream.respond_to?(:stream_exception) && stream.stream_exception
      bar.remove(stream.stream_exception)
    else
      bar.remove
    end if bar

    if stream.respond_to?(:join)
      eof = begin
              stream.eof?
            rescue IOError
              true
            end
      stream.join if eof
    end
  end
end

.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/scout/tsv/stream.rb', line 2

def self.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
  sep = "\t" if sep.nil?

  streams = streams.collect do |stream|
    case stream
    when(defined? Step and Step)
      stream.stream
    when Path
      stream.open
    when TSV::Dumper
      stream.stream
    when TSV
      stream.dumper_stream
    else
      stream
    end
  end.compact

  num_streams = streams.length

  streams = streams.collect do |stream|
    Open.sort_stream(stream, memory: sort_memory, cmd_args: sort_cmd_args)
  end if sort

  begin

    lines         =[]
    fields        =[]
    sizes         =[]
    key_fields    =[]
    input_options =[]
    empty         =[]
    preambles     =[]
    parser_types  =[]

    type ||= :double

    streams = streams.collect do |stream|

      parser = TSV::Parser.new stream, sep: sep
      #parser.type = type

      sfields = parser.fields

      if field_prefix
        index = streams.index stream
        prefix = field_prefix[index]

        sfields = sfields.collect{|f|[prefix, f]* ":"}
      end

      first_line = parser.first_line
      first_line = nil if first_line == ""

      lines         << first_line
      key_fields    << parser.key_field
      fields        << sfields
      sizes         << sfields.length if sfields
      input_options << parser.options
      preambles     << parser.preamble      if preamble and not parser.preamble.empty?
      parser_types  << parser.type

      empty         << stream               if parser.first_line.nil? || parser.first_line.empty?

      stream
    end


    all_fields = fields.dup

    key_field = key_fields.compact.first

    if same_fields
      fields = fields.first
    else
      fields = fields.compact.flatten
    end

    options = input_options.first 
    type ||= options[:type]
    type ||= :list if type == :single
    type ||= :double if type == :flat

    preamble_txt = case preamble
                   when TrueClass
                     preambles * "\n"
                   when String
                     if preamble[0]== '+'
                       preambles * "\n" + "\n" + preamble[1..-1]
                     else
                       preamble
                     end
                   else
                     nil
                   end

    empty_pos = empty.collect{|stream| streams.index stream}

    keys =[]
    parts =[]
    lines.each_with_index do |line,i|
      if line.nil? || line.empty?
        keys[i]= nil
        parts[i]= nil
      else
        vs = line.split(sep, -1)
        key, *p = vs
        p = [p] if parser_types[i] == :flat
        keys[i]= key
        parts[i]= p
      end
      sizes[i] ||= parts[i].length unless parts[i].nil?
    end
    done_streams =[]

    fields = nil if fields && fields.empty?
    dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one
    dumper.init(preamble: preamble_txt || !!key_field)

    t = Thread.new do
      Thread.report_on_exception = false
      Thread.current["name"] = "Paste streams"

      last_min = nil
      while lines.reject{|line| line.nil?}.any?
        min = keys.compact.sort.first
        break if min.nil?
        new_values =[]

        skip = all_match && keys.uniq !=[min]

        keys.each_with_index do |key,i|
          case key
          when min
            new_parts = parts[i]

            begin
              line = lines[i]= begin
                                 streams[i].gets
                             rescue
                               Log.exception $!
                               nil
                             end
            if line.nil?
              keys[i]= nil
              parts[i]= nil
            else
              k, *p = line.chomp.split(sep, -1)
              p = p.collect{|e| e.nil? ? "" : e }
              p = [p] if parser_types[i] == :flat

              if k == keys[i]
                new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] }
                raise TryAgain 
              end
              keys[i]= k
              parts[i]= p
            end

            new_values << new_parts
          rescue TryAgain
            keys[i]= nil
            parts[i]= nil
            retry
          end
        else
          p = [nil] * sizes[i]
          new_values << p
        end
      end

      next if skip

      if same_fields
        new_values_same = []
        new_values.each do |list|
          list.each_with_index do |l,i|
            new_values_same[i] ||= []
            new_values_same[i] << l
          end
        end
        new_values = new_values_same
      else
        new_values = new_values.inject([]){|acc,l| acc.concat l }
      end

      new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one

      dumper.add min, new_values
    end

    dumper.close

    streams.each do |stream|
      stream.close if stream.respond_to?(:close)
      stream.join if stream.respond_to?(:join)
    end
    end
  rescue Aborted
    Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
    streams.each do |stream|
      stream.abort if stream.respond_to? :abort
    end
    raise $!
  rescue Exception
    Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
    streams.each do |stream|
      stream.abort if stream.respond_to? :abort
    end
    raise $!
  end

  Thread.pass until t["name"]

  ConcurrentStream.setup(dumper.stream, threads: [t])
end

.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/scout/tsv/index.rb', line 159

def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
  kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
  type, data_persist = IndiferentHash.process_options kwargs, :type

  prefix = "PositionIndex[#{pos_field}]"

  prefix += select_prefix_str(kwargs[:select])

  persist_options = IndiferentHash.pull_keys kwargs, :persist
  persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :type => :fwt, :persist => true

  data_options = IndiferentHash.pull_keys kwargs, :data

  Persist.persist(tsv_file, persist_options[:type], persist_options.merge(other_options: kwargs.merge(pos_field: pos_field, key_field: key_field))) do |filename|
    tsv_file = TSV.open(tsv_file, *data_options) if data_options[:persist] && ! TSV === tsv_file

    log_msg = "PositionIndex #{Log.fingerprint tsv_file} #{pos_field}"
    Log.low log_msg
    bar = log_msg if TrueClass === bar

    max_key_size = 0
    index_data = []
    TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :flat, cast: :to_i, bar: bar, **kwargs do |key, pos|
      key_size = key.length
      max_key_size = key_size if key_size > max_key_size

      if Array === pos
        pos.each do |p|
          index_data << [key, p]
        end
      else
        index_data << [key, pos]
      end
    end

    filename = :memory if filename.nil?
    index = FixWidthTable.get(filename, max_key_size, false)
    index.add_point index_data
    index.read
    index
  end
end

.process_stream(stream, header_hash: "#", &block) ⇒ Object



211
212
213
214
215
216
217
218
219
# File 'lib/scout/tsv/open.rb', line 211

def self.process_stream(stream, header_hash: "#", &block)
  sout = Open.open_pipe do |sin|
    while line = stream.gets 
      break unless line.start_with?(header_hash)
      sin.puts line
    end
    yield sin, line
  end
end

.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/scout/tsv/index.rb', line 115

def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
  kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
  type, data_persist = IndiferentHash.process_options kwargs, :type, :data_persist

  prefix = "RangeIndex[#{start_field}-#{end_field}]"

  prefix += select_prefix_str(kwargs[:select])

  persist_options = IndiferentHash.pull_keys kwargs, :persist
  persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :type => :fwt, :persist => true

  data_options = IndiferentHash.pull_keys kwargs, :data

  Persist.persist(tsv_file, persist_options[:type], persist_options.merge(other_options: kwargs.merge(start_field: start_field, end_field: end_field, key_field: key_field))) do |filename|
    tsv_file = TSV.open(tsv_file, *data_options) if data_options[:persist] && ! TSV === tsv_file

    log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
    Log.low log_msg
    bar = log_msg if TrueClass === bar

    max_key_size = 0
    index_data = []
    TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, unnamed: true, **kwargs do |key, values|
      key_size = key.length
      max_key_size = key_size if key_size > max_key_size

      start_pos, end_pos = values
      if Array === start_pos
        start_pos.zip(end_pos).each do |s,e|
          index_data << [key, [s.to_i, e.to_i]]
        end
      else
        index_data << [key, [start_pos.to_i, end_pos.to_i]]
      end
    end

    filename = :memory if filename.nil?
    index = FixWidthTable.get(filename, max_key_size, true)
    index.add_range index_data
    index.read
    index
  end
end

.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/scout/tsv/util/select.rb', line 2

def self.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block)
  return ! select(key, values, method, field: field, invert: false, type: type, sep: sep, &block) if invert

  return yield(key, values) if method.nil? && block_given

  if Hash === method
    if method.include?(:invert)
      method = method.dup
      invert = method.delete(:invert)
      return select(key, values, method, fields: fields, field: field, invert: invert, type: type, sep: sep, &block)
    end
    field = method.keys.first
    value = method[field]
    return select(key, values, value, fields: fields, field: field, invert: invert, type: type, sep: sep, &block)
  end

  if field
    field = NamedArray.identify_name(fields, field) if fields && String === field
    set = field == :key ? [key] : (type == :double ? values[field].split(sep) : values[field])
  else
    set = [key, (type == :double ? values.collect{|v| v.split(sep) } : values)]
  end

  if Array === set
    set.flatten!
  else
    set = [set]
  end

  case method
  when Array
    (method & set).any?
  when Regexp
    set.select{|v| v =~ method }.any?
  when Symbol
    set.first.send(method)
  when Numeric
    set.size > method
  when String
    if block_given?
      field = method
      field = fields.index?(field) if fields && String === field
      case 
      when block.arity == 1
        if (method == key_field or method == :key)
          yield(key)
        else
          yield(values[method])
        end
      when block.arity == 2
        if (method == key_field or method == :key)
          yield(key, key)
        else
          yield(key, values[method])
        end
      end
    elsif m = method.match(/^([<>]=?)(.*)/)
      set.select{|v| v.to_f.send($1, $2.to_f) }.any?
    else
      set.select{|v| v == method }.any?
    end
  when Proc
    set.select{|v| method.call(v) }.any?
  end
end

.select_prefix_str(select) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/scout/tsv/index.rb', line 6

def self.select_prefix_str(select)
  str = begin
          case select
          when nil
            nil
          when Array
            case select.first
            when nil
              nil
            when Array
              select.collect{|p| p * "="}*","
            else
              select.collect{|p| p.to_s }*"="
            end
          when Hash
            if select.empty?
              nil
            else
              select.collect do |key,value|
                [key.to_s, value.to_s] * "="
              end * ","
            end
          end
        rescue
          Log.warn "Error in select_prefix_str: #{Log.fingerprint(select)}: #{$!.message}"
          str = nil
        end
  if str.nil?
    ""
  else
    "[select:#{str}]"
  end
end

.setup(obj, *rest, &block) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/scout/tsv.rb', line 36

def setup(obj, *rest, &block)
  if rest.length == 1 && String === rest.first
    options = TSV.str2options(rest.first)
    if Array === obj
      default_value = case options[:type]
                      when :double, :flat, :list, nil
                        []
                      when :single
                        nil
                      end
      obj = IndiferentHash.array2hash(obj, default_value)
    end
    original_setup(obj, options, &block)
  else
    if Array === obj
      options = rest.first if Hash === rest.first
      options ||= {}
      default_value = case options[:type]
                      when :double, :flat, :list, nil
                        []
                      when :single
                        nil
                      end
      obj = IndiferentHash.array2hash(obj, default_value)
    end
    original_setup(obj, *rest, &block)
  end

  obj.save_annotation_hash if obj.respond_to?(:save_annotation_hash)

  obj
end

.str2options(str) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
# File 'lib/scout/tsv.rb', line 21

def self.str2options(str)
  field_options,_sep, rest =  str.partition("#")
  key, fields_str = field_options.split("~")

  fields = fields_str.nil? ? [] : fields_str.split(/,\s*/)

  rest = ":type=" << rest if rest =~ /^:?\w+$/
  rest_options = rest.nil? ? {} : IndiferentHash.string2hash(rest)

  {:key_field => key, :fields => fields}.merge(rest_options)
end

.str_setup(option_str, obj) ⇒ Object



70
71
72
73
# File 'lib/scout/tsv.rb', line 70

def self.str_setup(option_str, obj)
  options = TSV.str2options(option_str) 
  setup(obj, **options)
end

.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/scout/tsv/change_id/translate.rb', line 116

def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true)

  identifiers ||= tsv.identifier_files
  index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index)

  key_field, *fields = TSV.all_fields(tsv)
  if field == key_field
    new_key_field = format
    new_fields = fields
  else
    new_key_field = key_field
    new_fields = fields.collect{|f| f == field ? format : f }
  end

  field_pos = new_key_field == key_field ? new_fields.index(format) : :key

  transformer = TSV::Transformer.new tsv
  transformer.key_field = new_key_field
  transformer.fields = new_fields
  transformer.traverse one2one: one2one, unnamed: true do |k,v|
    if field_pos == :key
      [index[k], v]
    else
      v = v.dup
      if Array === v[field_pos]
        v[field_pos] = index.values_at(*v[field_pos]).compact
      else
        v[field_pos] = index[v[field_pos]]
      end
      [k, v]
    end
  end

  stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
end

.translation_index(files, source, target, persist_options = {}) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/scout/tsv/change_id/translate.rb', line 49

def self.translation_index(files, source, target, persist_options = {})
  return nil if source == target
  persist_options = IndiferentHash.add_defaults persist_options.dup, :persist => true, :prefix => "Translation index"

  file_fields = {}

  files = [files] unless Array === files

  files.each do |file|
    #next if Path === file && ! Open.exist?(file)
    Path.setup file if String === file and not Path === file
    begin
      file = file.produce if Path === file
      raise "Could no produce file" if FalseClass === file
    rescue
      Log.warn $!.message
      next
    end
    file = file.find if Path === file
    file_fields[file] = all_fields(file)
  end

  begin
    path = translation_path(file_fields, source, target)
  rescue
    exception = $!
    begin
      path = translation_path(file_fields, source, target)
    rescue
      raise exception
    end
  end

  name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})"
  second_target = if path.length == 1
                    target
                  else
                    file1, file2 = path.values_at 0, 1
                    pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2))
                    TSV.all_fields(file1)[pos.compact.first]
                  end
  Persist.persist(name, "HDB", persist_options) do 
    index = path.inject(nil) do |acc,file|
      if acc.nil?
        if source.nil?
          if TSV === file
            acc = file.index target: second_target
          else
            acc = TSV.index(file, target: second_target)
          end
        else
          if TSV === file
            acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source)
          else
            acc = TSV.open(file, key_field: source)
          end
        end
      else
        acc = acc.attach file, insitu: false
      end

      acc
    end
    index.slice([target]).to_single
  end
end

.translation_path(file_fields, source, target) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/scout/tsv/change_id/translate.rb', line 18

def self.translation_path(file_fields, source, target)
  target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file }
  if source.nil?
    source_files = file_fields.keys
  else
    source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file }
  end

  if source && (one_step = target_files & source_files).any?
    [one_step.first]
  else
    source_fields = file_fields.values_at(*source_files).flatten
    target_fields = file_fields.values_at(*target_files).flatten
    if (common_fields = source_fields & target_fields).any?
      source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
      target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
      [source_file, target_file]
    else
      file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }
      middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first
      if middle_file
        source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
        target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
        [source_file, middle_file, target_file]
      else
        raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target}. #{file_fields.empty? ? "No identifier files" : Log.fingerprint(file_fields)}"
      end
    end
  end
end

.traverse(*args, **kwargs, &block) ⇒ Object



207
208
209
# File 'lib/scout/tsv/open.rb', line 207

def self.traverse(*args, **kwargs, &block)
  Open.traverse(*args, **kwargs, &block)
end

.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/scout/tsv/util/unzip.rb', line 3

def self.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil)
  source = TSV::Parser.new source if String === source

  field_pos = source.identify_field(field)
  new_fields = source.fields.dup
  field_name = new_fields[field_pos]
  new_fields.delete_at(field_pos) if delete
  new_key_field = [source.key_field, field_name] * sep
  type = :double if merge

  stream = target == :stream

  target = case target
           when :stream
             TSV::Dumper.new(source.options.merge(sep: "\t"))
           when nil
             TSV.setup({})
           else
             target
           end
             
  target.fields = new_fields
  target.key_field = new_key_field
  target.type = type

  transformer = TSV::Transformer.new source, target, unnamed: true

  bar = "Unzip #{new_key_field}" if TrueClass === bar

  transformer.traverse unnamed: true, one2one: one2one, bar: bar do |k,v|
    if source.type == :double
      if one2one
        res = NamedArray.zip_fields(v).collect do |_v|
          field_value = _v[field_pos]

          if delete
            new_values = _v.dup
            new_values.delete_at field_pos
          else
            new_values = _v
          end

          new_key = [k,field_value] * sep
          new_values = new_values.collect{|e| [e] } if transformer.type == :double
          [new_key, new_values]
        end
      else
        all_values = v.collect{|e| e.dup }
        all_values.delete_at field_pos if delete
        res = NamedArray.zip_fields(v).collect do |_v|
          field_value = _v[field_pos]

          new_key = [k,field_value] * sep
          new_values = all_values if transformer.type == :double
          [new_key, new_values]
        end
      end
      
      MultipleResult.setup(res)
    else
      field_value = v[field_pos]

      if delete
        new_values = v.dup
        new_values.delete_at field_pos
      else
        new_values = v
      end

      new_key = [k,field_value] * sep

      new_values = new_values.collect{|e| [e] } if transformer.type == :double

      [new_key, new_values]
    end
  end

  stream ? transformer : transformer.tsv(merge: merge)
end

Instance Method Details

#[](key, *rest) ⇒ Object



57
58
59
60
61
# File 'lib/scout/tsv/util.rb', line 57

def [](key, *rest)
  v = super(key, *rest)
  NamedArray.setup(v, @fields, key) unless @unnamed || @type == :flat || ! (Array === v)
  v
end

#add_field(name = nil) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/scout/tsv/util/process.rb', line 46

def add_field(name = nil)
  keys.each do |key|
    values = self[key]
    new_values = yield(key, values)
    new_values = [new_values].compact if type == :double and not Array === new_values

    case
    when type == :single
      values = new_values
    when (values.nil? and (fields.nil? or fields.empty?))
      values = [new_values]
    when values.nil?  
      values = [nil] * fields.length + [new_values]
    when Array === values
      values += [new_values]
    else
      values << new_values
    end

    self[key] = values
  end

  if not fields.nil? and not name.nil?
    new_fields = self.fields + [name]
    self.fields = new_fields
  end

  self
end

#all_fieldsObject



150
151
152
153
# File 'lib/scout/tsv/util.rb', line 150

def all_fields
  return [] if @fields.nil?
  [@key_field] + @fields
end

#attach(*args, **kwargs) ⇒ Object



228
229
230
# File 'lib/scout/tsv/attach.rb', line 228

def attach(*args, **kwargs)
  TSV.attach(self, *args, **kwargs)
end

#change_id(*args, **kwargs) ⇒ Object



43
44
45
# File 'lib/scout/tsv/change_id.rb', line 43

def change_id(*args, **kwargs)
  TSV.change_id(self, *args, **kwargs)
end

#change_key(*args, **kwargs) ⇒ Object



29
30
31
# File 'lib/scout/tsv/change_id.rb', line 29

def change_key(*args, **kwargs)
  TSV.change_key(self, *args, **kwargs)
end

#chunked_values_at(keys, max = 5000) ⇒ Object



272
273
274
275
276
277
278
# File 'lib/scout/tsv/util/select.rb', line 272

def chunked_values_at(keys, max = 5000)
  Misc.ordered_divide(keys, max).inject([]) do |acc,c|
    new = self.values_at(*c)
    new.annotate acc if new.respond_to? :annotate and acc.empty?
    acc.concat(new)
  end
end

#collapse_stream(*args, **kwargs, &block) ⇒ Object



229
230
231
# File 'lib/scout/tsv/open.rb', line 229

def collapse_stream(*args, **kwargs, &block)
  TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
end

#collect(*args, &block) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
# File 'lib/scout/tsv/util.rb', line 98

def collect(*args, &block)
  if block_given?
    res = []
    each do |k,v|
      res << yield(k, v)
    end
    res
  else
    super(*args)
  end
end

#column(field, **kwargs) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/scout/tsv/util/reorder.rb', line 47

def column(field, **kwargs)
  new_type = case type
             when :double, :flat
               :flat
             else
               :single
             end

  kwargs[:type] = new_type
  slice(field, **kwargs)
end

#digest_strObject



171
172
173
# File 'lib/scout/tsv/util.rb', line 171

def digest_str
  "TSV:{" + Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << ";" << Log.fingerprint(self.values) << "}"
end

#dumper_stream(options = {}) ⇒ Object Also known as: stream



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/scout/tsv/dumper.rb', line 149

def dumper_stream(options = {})
  preamble, unmerge, keys, stream = IndiferentHash.process_options options, 
    :preamble, :unmerge, :keys, :stream,
    :preamble => true, :unmerge => false
  unmerge = false unless @type === :double
  dumper = TSV::Dumper.new self.annotation_hash.merge(options)

  dump_entry = Proc.new do |k,value_list|
    if unmerge
      max = value_list.collect{|v| v.length}.max

      if unmerge == :expand and max > 1
        value_list = value_list.collect do |values|
          if values.length == 1
            [values.first] * max
          else
            values
          end
        end
      end

      NamedArray.zip_fields(value_list).each do |values|
        dumper.add k, values
      end
    else
      dumper.add k, value_list
    end
  end

  self.with_unnamed do
    if stream.nil?
      t = Thread.new do 
        begin
          Thread.current.report_on_exception = true
          Thread.current["name"] = "Dumper thread"
          dumper.init(preamble: preamble)

          if keys
            keys.each do |k|
              dump_entry.call k, self[k]
            end
          else
            self.each &dump_entry
          end

          dumper.close
        rescue
          dumper.abort($!)
        end
      end
      Thread.pass until t["name"]
      stream = dumper.stream
      ConcurrentStream.setup(stream, :threads => [t])
      stream
    else
      dumper.set_stream stream
      begin
        dumper.init(preamble: preamble)
        if keys
          keys.each do |k|
            dump_entry.call k, self[k]
          end
        else
          self.each &dump_entry
        end

        dumper.close
      rescue
        dumper.abort($!)
      end
      stream
    end
  end
end

#each(*args, &block) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/scout/tsv/util.rb', line 86

def each(*args, &block)
  if block_given?
    actual_unnamed = @unnamed.nil? ? true : @unnamed
    super(*args) do |k,v|
      NamedArray.setup(v, @fields) unless actual_unnamed || @type == :flat || ! (Array === v)
      block.call(k, v)
    end
  else
    super(*args)
  end
end

#filter(filter_dir = nil) ⇒ Object



285
286
287
288
289
290
# File 'lib/scout/tsv/util/filter.rb', line 285

def filter(filter_dir = nil)
  self.extend Filtered
  self.filter_dir = filter_dir
  self.filters = []
  self
end

#fingerprintObject



167
168
169
# File 'lib/scout/tsv/util.rb', line 167

def fingerprint
  "TSV:{" + Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << "}"
end

#head(max = 10) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
# File 'lib/scout/tsv/transformer.rb', line 180

def head(max=10)
  res = self.annotate({})
  transformer = Transformer.new self, res
  i = 0
  transformer.traverse do |k,v|
    i += 1
    break if i > max
    [k, v]
  end
  res
end

#identifier_filesObject



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/scout/tsv/attach.rb', line 232

def identifier_files
  case
  when (identifiers and TSV === identifiers)
    [identifiers]
  when (identifiers and Array === identifiers)
    case
    when (TSV === identifiers.first or identifiers.empty?)
      identifiers
    else
      identifiers.collect{|f| Path === f ? f : Path.setup(f)}
    end
  when identifiers
    [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
  when Path === filename
    path_files = filename.dirname.identifiers
    [path_files].flatten.compact.select{|f| f.exists?}
  when filename
    [Path.setup(filename.dup).dirname.identifiers]
  else
    []
  end
end

#identify_field(name, strict: nil) ⇒ Object



53
54
55
# File 'lib/scout/tsv/util.rb', line 53

def identify_field(name, strict: nil)
  TSV.identify_field(@key_field, @fields, name, strict: strict)
end

#index(*args, **kwargs, &block) ⇒ Object



111
112
113
# File 'lib/scout/tsv/index.rb', line 111

def index(*args, **kwargs, &block)
   TSV.index(self, *args, **kwargs, &block)
end

#inspectObject



175
176
177
# File 'lib/scout/tsv/util.rb', line 175

def inspect
  fingerprint
end

#melt_columns(value_field, column_field) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
# File 'lib/scout/tsv/util/melt.rb', line 2

def melt_columns(value_field, column_field)
  target = TSV.setup({}, :key_field => "ID", :fields => [key_field, value_field, column_field], :type => :list, :cast => cast)
  each do |k,values|
    i = 0
    values.zip(fields).each do |v,f|
      target["#{k}:#{i}"] = [k,v,f]
      i+=1
    end
  end
  target
end

#merge(other) ⇒ Object



179
180
181
# File 'lib/scout/tsv/util.rb', line 179

def merge(other)
  self.annotate(super(other))
end

#merge_zip(other) ⇒ Object



183
184
185
186
187
# File 'lib/scout/tsv/util.rb', line 183

def merge_zip(other)
  other.each do |k,v|
    self.zip_new k, v
  end
end

#optionsObject



63
64
65
# File 'lib/scout/tsv/util.rb', line 63

def options
  annotation_hash
end

#page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/scout/tsv/util/sort.rb', line 156

def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
  pstart = psize * (pnum - 1)
  pend = psize * pnum - 1
  field = :key if field == "key"
  keys = sort_by(field || :key, true, &block)
  keys.reverse! if reverse

  if just_keys
    keys[pstart..pend]
  else
    select :key => keys[pstart..pend]
  end
end

#pos_index(*args, **kwargs, &block) ⇒ Object



206
207
208
# File 'lib/scout/tsv/index.rb', line 206

def pos_index(*args, **kwargs, &block)
  TSV.pos_index(self, *args, **kwargs, &block)
end

#prepare_entityObject



4
5
6
# File 'lib/scout/tsv/util/sort.rb', line 4

def prepare_entity(...)
  Entity.prepare_entity(...)
end

#process(field, &block) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/scout/tsv/util/process.rb', line 2

def process(field, &block)
  field_pos = identify_field field

  through do |key, values|
    case
    when type == :single
      field_values = values
    when type == :flat
      field_values = values
    else
      next if values.nil?
      field_values = values[field_pos]
    end

    new_values = case 
                 when block.arity == 1
                   yield(field_values)
                 when block.arity == 2
                   yield(field_values, key)
                 when block.arity == 3
                   yield(field_values, key, values)
                 else
                   raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}"
                 end

    case
    when type == :single
      self[key] = new_values
    when type == :flat
      self[key] = new_values
    else
      if ! values[field_pos].frozen? && ! NamedArray === values && ((String === values[field_pos] && String === new_values) ||
        (Array === values[field_pos] && Array === new_values))
        values[field_pos].replace new_values
      else
        values[field_pos] = new_values
      end
      self[key] = values
    end
  end

  self
end

#range_index(*args, **kwargs, &block) ⇒ Object



202
203
204
# File 'lib/scout/tsv/index.rb', line 202

def range_index(*args, **kwargs, &block)
  TSV.range_index(self, *args, **kwargs, &block)
end

#remove_duplicates(pivot = 0) ⇒ Object



76
77
78
79
80
81
82
# File 'lib/scout/tsv/util/process.rb', line 76

def remove_duplicates(pivot = 0)
  new = self.annotate({})
  self.through do |k,values|
    new[k] = NamedArray.zip_fields(NamedArray.zip_fields(values).uniq)
  end
  new
end

#reorder(key_field = nil, fields = nil, merge: true, one2one: true, data: nil, unnamed: true, **kwargs) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/scout/tsv/util/reorder.rb', line 4

def reorder(key_field = nil, fields = nil, merge: true, one2one: true, data: nil, unnamed: true, **kwargs) 
  res = data || self.annotate({})
  res.type = kwargs[:type] if kwargs.include?(:type)
  kwargs[:one2one] = one2one
  key_field_name, field_names = with_unnamed unnamed do
    traverse key_field, fields, **kwargs do |k,v|
      if res.type == :double && merge && res.include?(k)
        current = res[k]
        if merge == :concat
          v.each_with_index do |new,i|
            next if new.empty?
            current[i].concat(new)
          end
        else
          merged = []
          v.each_with_index do |new,i|
            next if new.empty?
            merged[i] = current[i] + new
          end
          res[k] = merged
        end
      elsif res.type == :flat
        if merge == :concat
          res[k] ||= []
          res[k].concat v
        else
          res[k] = res[k].nil? ? v : res[k] + v
        end
      else
        res[k] = v
      end
    end
  end

  res.key_field = key_field_name
  res.fields = field_names
  res
end

#reset_filtersObject



292
293
294
295
296
297
298
299
300
301
# File 'lib/scout/tsv/util/filter.rb', line 292

def reset_filters
  if @filter_dir.nil? or @filter_dir.empty?
    @filters.each do |filter| filter.reset end if Array === @filters
    return
  end

  Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
    FileUtils.rm f
  end
end

#select(method = nil, invert = false, &block) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/scout/tsv/util/select.rb', line 68

def select(method = nil, invert = false, &block)
  new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers)

  self.annotate(new)
  
  case
  when (method.nil? and block_given?)
    through do |key, values|
      new[key] = values if invert ^ (yield key, values)
    end
  when Array === method
    method = Set.new method
    with_unnamed do
      case type
      when :single
        through do |key, value|
          new[key] = value if invert ^ (method.include? key or method.include? value)
        end
      when :list, :flat
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values).length > 0)
        end
      else
        through do |key, values|
          new[key] = values if invert ^ (method.include? key or (method & values.flatten).length > 0)
        end
      end
    end
  when Regexp === method
    with_unnamed do
      through do |key, values|
        new[key] = values if invert ^ ([key,values].flatten.select{|v| v =~ method}.any?)
      end
    end
  when ((String === method) || (Symbol === method))
    if block_given?
      case 
      when block.arity == 1
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(value))
            end
          else
            pos = identify_field method
            raise "Field #{ method } not identified. Available: #{ fields * ", " }" if pos.nil?

            through do |key, values|
              new[key] = values if invert ^ (yield(values[pos]))
            end
          end
        end
      when block.arity == 2
        with_unnamed do
          case
          when (method == key_field or method == :key)
            through do |key, values|
              new[key] = values if invert ^ (yield(key, key))
            end
          when (type == :single or type == :flat)
            through do |key, value|
              new[key] = value if invert ^ (yield(key, value))
            end
          else
            pos = identify_field method
            through do |key, values|
              new[key] = values if invert ^ (yield(key, values[pos]))
            end
          end

        end
      end

    else
      with_unnamed do
        through do |key, values|
          new[key] = values if invert ^ ([key,values].flatten.select{|v| v == method}.any?)
        end
      end
    end
  when Hash === method
    key  = method.keys.first
    method = method.values.first
    case
    when ((Array === method) and (key == :key or key_field == key))
      with_unnamed do
        if invert
          keys.each do |key|
            new[key] = self[key] unless method.include?(key)
          end
        else
          method.each do |key|
            new[key] = self[key] if self.include?(key)
          end
        end
      end
    when Array === method
      with_unnamed do
        method = Set.new method unless Set === method
        case type
        when :single
          through :key, key do |key, value|
            new[key] = self[key] if invert ^ (method.include? value)
          end
        when :list
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ (method.include? values.first)
          end
        when :flat #untested
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        else
          through :key, key do |key, values|
            new[key] = self[key] if invert ^ ((method & values.flatten).any?)
          end
        end
      end

    when Regexp === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| v =~ method}.any?)
        end
      end

    when ((String === method) and (method =~ /name:(.*)/))
      name = $1
      old_unnamed = self.unnamed
      self.unnamed = false
      if name.strip =~ /^\/(.*)\/$/
        regexp = Regexp.new $1
        through :key, key do |key, values|
          case type
          when :single
            values = values.annotate([values])
          when :double
            values = values[0]
          end
          new[key] = self[key] if invert ^ (values.select{|v| v.name =~ regexp}.any?)
        end
      else
        through :key, key do |key, values|
          case type
          when :single
            values = values.annotate([values])
          when :double
            values = values[0]
          end
          new[key] = self[key] if invert ^ (values.select{|v| v.name == name}.any?)
        end
      end
      self.unnamed = old_unnamed

    when String === method
      if method =~ /^([<>]=?)(.*)/
        with_unnamed do
          through :key, key do |key, values|
            value = Array === values ? values.flatten.first : values
            new[key] = self[key] if value.to_f.send($1, $2.to_f)
          end
        end
      else
        with_unnamed do
          through :key, key do |key, values|
            values = [values] if type == :single
            new[key] = self[key] if invert ^ (values.flatten.select{|v| v == method}.length > 0)
          end
        end
      end
    when Numeric === method
      with_unnamed do
        through :key, key do |key, values|
          new[key] = self[key] if invert ^ (values.flatten.length >= method)
        end
      end
    when Proc === method
      with_unnamed do
        through :key, key do |key, values|
          values = [values] if type == :single
          new[key] = self[key] if invert ^ (values.flatten.select{|v| method.call(v)}.length > 0)
        end
      end
    end
  end
  new
end

#slice(fields, **kwargs) ⇒ Object



43
44
45
# File 'lib/scout/tsv/util/reorder.rb', line 43

def slice(fields, **kwargs)
  reorder :key, fields, **kwargs
end

#sort(field = nil, just_keys = false, &block) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/scout/tsv/util/sort.rb', line 83

def sort(field = nil, just_keys = false, &block)
  field = :all if field.nil?

  if field == :all
    elems = collect
  else
    elems = []
    case type
    when :single
      through :key, field do |key, value|
        elems << [key, value]
      end
    when :list, :flat
      through :key, field do |key, values|
        elems << [key, values[0]]
      end
    when :double
      through :key, field do |key, values|
        elems << [key, values[0]]
      end
    end
  end

  if not block_given?
    if fields == :all
      if just_keys
        keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
        keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true))
      else
        elems.sort_by{|key, value| key }
      end
    else
      sorted = elems.sort do |a, b| 
        a_value = a.last
        b_value = b.last
        a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
        b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
        case
        when (a_empty and b_empty)
          0
        when a_empty
          -1
        when b_empty
          1
        when Array === a_value
          if a_value.length == 1 and b_value.length == 1
            a_value.first <=> b_value.first
          else
            a_value.length <=> b_value.length
          end
        else
          a_value <=> b_value
        end
      end
      if just_keys
        keys = sorted.collect{|key, value| key}
        keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
        keys
      else
        sorted.collect{|key, value| [key, self[key]]}
      end
    end
  else
    if just_keys
      keys = elems.sort(&block).collect{|key, value| key}
      keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
      keys
    else
      elems.sort(&block).collect{|key, value| [key, self[key]]}
    end
  end
end

#sort_by(field = nil, just_keys = false, &block) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/scout/tsv/util/sort.rb', line 8

def sort_by(field = nil, just_keys = false, &block)
  field = :all if field.nil?

  if field == :all
    elems = collect
  else
    elems = []
    case type
    when :single
      through :key, field do |key, value|
        elems << [key, value]
      end
    when :list, :flat
      through :key, field do |key, value|
        v = field == :key ? key : value[0]
        elems << [key, v]
      end
    when :double
      through :key, field do |key, value|
        v = field == :key ? key : value[0]
        elems << [key, v]
      end
    end
  end

  if not block_given?
    if fields == :all
      if just_keys
        keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
        keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
      else
        elems.sort_by{|key, value| key }
      end
    else
      sorted = elems.sort do |a, b| 
        a_value = a.last
        b_value = b.last
        a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
        b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
        case
        when (a_empty and b_empty)
          0
        when a_empty
          -1
        when b_empty
          1
        when Array === a_value
          if a_value.length == 1 and b_value.length == 1
            a_value.first <=> b_value.first
          else
            a_value.length <=> b_value.length
          end
        else
          a_value <=> b_value
        end
      end
      if just_keys
        keys = sorted.collect{|key, value| key}
        keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
        keys
      else
        sorted.collect{|key, value| [key, self[key]]}
      end
    end
  else
    if just_keys
      keys = elems.sort_by(&block).collect{|key, value| key}
      keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
      keys
    else
      elems.sort_by(&block).collect{|key, value| [key, self[key]]}
    end
  end
end

#subset(keys) ⇒ Object



262
263
264
265
266
267
268
269
270
# File 'lib/scout/tsv/util/select.rb', line 262

def subset(keys)
  new = self.annotate({})
  self.with_unnamed do
    keys.each do |k|
      new[k] = self[k] if self.include?(k)
    end
  end
  new
end

#summaryObject



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/scout/tsv/util.rb', line 121

def summary
  key = nil
  values = nil
  self.each do |k, v|
    key = k
    values = v
    break
  end

  filename = @filename
  filename = "No filename" if filename.nil? || String === filename && filename.empty?
  filename.find if Path === filename 
  filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path

  with_unnamed do
    "Filename = \#{filename}\nKey field = \#{key_field || \"*No key field*\"}\nFields = \#{fields ? Log.fingerprint(fields) : \"*No field info*\"}\nType = \#{type}\nSize = \#{size}\nnamespace = \#{Log.fingerprint namespace}\nidentifiers = \#{Log.fingerprint identifiers}\nExample:\n- \#{key} -- \#{Log.fingerprint values }\n    EOF\n  end\nend\n"

#to_doubleObject



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/scout/tsv/transformer.rb', line 136

def to_double
  return self if self.type == :double
  res = self.annotate({})
  self.with_unnamed do
    transformer = Transformer.new self, res
    transformer.type = :double
    transformer.traverse do |k,v|
      case self.type
      when :single
        [k, [[v]]]
      when :list
        [k, v.collect{|v| [v] }]
      when :flat
        [k, [v]]
      end
    end
  end
  res
end

#to_flatObject



169
170
171
172
173
174
175
176
177
178
# File 'lib/scout/tsv/transformer.rb', line 169

def to_flat
  res = self.annotate({})
  transformer = Transformer.new self, res
  transformer.type = :flat
  transformer.traverse do |k,v|
    v = Array === v ? v.flatten : [v]
    [k, v]
  end
  res
end

#to_hashObject



141
142
143
# File 'lib/scout/tsv.rb', line 141

def to_hash
  self.dup
end

#to_listObject



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/scout/tsv/transformer.rb', line 117

def to_list
  res = self.annotate({})
  self.with_unnamed do
    transformer = Transformer.new self, res
    transformer.type = :list
    transformer.traverse do |k,v|
      case self.type
      when :single
        [k, [v]]
      when :double
        [k, v.collect{|v| v.first }]
      when :flat
        [k, v.slice(0,1)]
      end
    end
  end
  res
end

#to_s(options = {}) ⇒ Object



224
225
226
# File 'lib/scout/tsv/dumper.rb', line 224

def to_s(options = {})
  dumper_stream({stream: ''}.merge(options))
end

#to_singleObject



157
158
159
160
161
162
163
164
165
166
167
# File 'lib/scout/tsv/transformer.rb', line 157

def to_single
  res = self.annotate({})
  transformer = Transformer.new self, res
  transformer.type = :single
  transformer.unnamed = true
  transformer.traverse do |k,v|
    v = v.first while Array === v
    [k, v]
  end
  res
end

#translate(*args, **kwargs) ⇒ Object



152
153
154
# File 'lib/scout/tsv/change_id/translate.rb', line 152

def translate(*args, **kwargs)
  TSV.translate(self, *args, **kwargs)
end

#transpose(key_field = "Unkown ID") ⇒ Object



81
82
83
84
85
86
87
88
89
90
# File 'lib/scout/tsv/util/reorder.rb', line 81

def transpose(key_field = "Unkown ID")
  case type
  when :single, :flat
    self.to_list.transpose_list key_field
  when :list
    transpose_list key_field
  when :double
    transpose_double key_field
  end
end

#transpose_double(key_field = "Unkown ID") ⇒ Object



74
75
76
77
78
79
# File 'lib/scout/tsv/util/reorder.rb', line 74

def transpose_double(key_field = "Unkown ID")
  sep = "-!SEP--#{rand 10000}!-"
  tmp = self.to_list{|v| v * sep}
  new = tmp.transpose_list(key_field)
  new.to_double{|v| v.split(sep)}
end

#transpose_list(key_field = "Unkown ID") ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/scout/tsv/util/reorder.rb', line 59

def transpose_list(key_field="Unkown ID")
  new_fields = keys.dup
  new = self.annotate({})
  TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers)

  m = Matrix.rows values 
  new_rows = m.transpose.to_a

  fields.zip(new_rows) do |key,row|
    new[key] = row
  end

  new
end

#traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) ⇒ Object Also known as: through



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/scout/tsv/traverse.rb', line 3

def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
  key_field = key_field_pos if key_field.nil?
  fields = fields_pos.dup if fields.nil?
  type = @type if type.nil?
  key_pos = self.identify_field(key_field)
  fields = self.all_fields if fields == :all
  fields = [fields] unless fields.nil? || Array === fields
  positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
  positions = nil if fields == self.fields
  unnamed = @unnamed if unnamed.nil?
  unnamed = false if unnamed.nil?

  if key_pos == :key
    key_name = @key_field
  else
    key_name = @fields[key_pos]
    if positions.nil?
      positions = (0..@fields.length-1).to_a
      positions.delete_at key_pos
      positions.unshift :key
    end
  end

  fields = positions.collect{|p| p == :key ? self.key_field : self.fields[p] } if positions

  if positions.nil? && key_pos == :key
    field_names = @fields.dup
  elsif positions.nil? && key_pos != :key
    field_names = @fields.dup
    field_names.delete_at key_pos unless fields == :all
  elsif positions.include?(:key)
    field_names = positions.collect{|p| p == :key ? @key_field : @fields[p] }
  else
    field_names = @fields.values_at *positions
  end

  key_index = positions.index :key if positions
  positions.delete :key if positions

  log_message = "Traverse #{Log.fingerprint self}"
  Log.debug log_message
  bar = log_message if TrueClass === bar

  invert = select.delete :invert if Hash === select
  type_swap_tag = [type.to_s, @type.to_s] * "_"
  Log::ProgressBar.with_obj_bar(self, bar) do |bar|
    with_unnamed unnamed do
      each do |key,values|
        next unless TSV.select key, values, select, invert: invert if select
        bar.tick if bar
        values = [values] if @type == :single
        if positions.nil?
          if key_pos != :key
            values = values.dup
            if @type == :flat
              key = values
            else
              key = values.delete_at(key_pos)
            end
          end
        else 
          orig_key = key
          key = @type == :flat ? values : values[key_pos] if key_pos != :key 

          values = values.values_at(*positions)
          if key_index
            if @type == :double
              values.insert key_index, [orig_key]
            else
              values.insert key_index, orig_key
            end
          end
        end

        if ! unnamed && fields
          case @type
          when :flat, :single
            values = Entity.prepare_entity(values, fields.first)
          else
            values = NamedArray.setup(values, fields, entity_options)
          end
        end

        values = TSV.cast_value(values, cast) if cast

        if Array === key 
          key = key.uniq if uniq
          if @type == :double && one2one
            if one2one == :strict
              key.each_with_index do |key_i,i|
                if type == :double
                  v_i = values.collect{|v| [v[i]] }
                else
                  v_i = values.collect{|v| v[i] }
                end
                yield key_i, v_i
              end
            else
              key.each_with_index do |key_i,i|
                if type == :double
                  v_i = values.collect{|v| [v[i] || v.first] }
                else
                  v_i = values.collect{|v| v[i] || v.first }
                end
                yield key_i, v_i, @fields
              end
            end
          else
            key.each_with_index do |key_i, i|
              if type == :double
                yield key_i, values
              elsif type == :list
                yield key_i, values.collect{|v| v[i] }
              elsif type == :flat
                yield key_i, values.flatten
              elsif type == :single
                yield key_i, values.first
              end
            end
          end
        else
          if type == @type
            if type == :single
              yield key, values.first
            else
              yield key, values
            end
          else
            case type_swap_tag
            when "double_list"
              yield key, values.collect{|v| [v] }
            when "double_flat"
              yield key, [values]
            when "double_single"
              yield key, [values]
            when "list_double"
              yield key, values.collect{|v| v.first }
            when "list_flat"
              yield key, [values.first]
            when "list_single"
              yield key, values
            when "flat_double"
              yield key, values.flatten
            when "flat_list"
              yield key, values.flatten
            when "flat_single"
              yield key, values
            when "single_double"
              yield key, values.flatten.first
            when "single_list"
              yield key, values.first
            when "single_flat"
              yield key, values.first
            end
          end
        end
      end
    end
  end
  

  [key_name, field_names]
end

#unzip(*args, **kwargs) ⇒ Object



83
84
85
# File 'lib/scout/tsv/util/unzip.rb', line 83

def unzip(*args, **kwargs)
  TSV.unzip(self, *args, **kwargs)
end

#unzip_replicatesObject



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/scout/tsv/util/unzip.rb', line 87

def unzip_replicates
  raise "Can only unzip replicates in :double TSVs" unless type == :double

  new = {}
  self.with_unnamed do
    through do |k,vs|
      NamedArray.zip_fields(vs).each_with_index do |v,i|
        new[k + "(#{i})"] = v
      end
    end
  end

  self.annotate(new)
  new.type = :list

  new
end

#with_filters(filters, &block) ⇒ Object



303
304
305
306
307
308
309
310
# File 'lib/scout/tsv/util/filter.rb', line 303

def with_filters(filters, &block)
  filter
  begin
    filters.each{|field,value| add_filter field, value }
  ensure
    reset_filters
  end
end

#with_unnamed(unnamed = nil) ⇒ Object



110
111
112
113
114
115
116
117
118
119
# File 'lib/scout/tsv/util.rb', line 110

def with_unnamed(unnamed = nil)
  unnamed = true if unnamed.nil?
  begin
    old_unnamed = @unnamed
    @unnamed = unnamed
    yield
  ensure
    @unnamed = old_unnamed
  end
end

#write_file(file) ⇒ Object



230
231
232
233
234
# File 'lib/scout/tsv/dumper.rb', line 230

def write_file(file)
  Open.open(file, mode: 'w') do |f|
    dumper_stream(stream: f)
  end
end

#zip(merge = false, field = "New Field", sep = ":") ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/scout/tsv/util/unzip.rb', line 105

def zip(merge = false, field = "New Field", sep = ":")
  new = {}
  self.annotate new

  new.type = :double if merge

  new.with_unnamed do
    if merge
      self.through do |key,values|
        new_key, new_value = key.split(sep)
        new_values = values + [[new_value] * values.first.length]
        if new.include? new_key
          current = new[new_key]
          current.each_with_index do |v,i|
            v.concat(new_values[i])
          end
        else
          new[new_key] = new_values
        end
      end
    else
      self.through do |key,values|
        new_key, new_value = key.split(sep)
        new_values = values + [new_value]
        new[new_key] = new_values
      end
    end
  end

  if self.key_field and self.fields
    new.key_field = self.key_field.partition(sep).first
    new.fields = new.fields + [field]
  end

  new
end

#zip_new(key, values, insitu: :lax) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/scout/tsv/util.rb', line 67

def zip_new(key, values, insitu: :lax)
  values = values.collect{|v| Array === v ? v : [v] } unless Array === values.first
  if current_values = self[key]
    if insitu == :lax
      self[key] = NamedArray.add_zipped(current_values, values)
    elsif insitu
      NamedArray.add_zipped(current_values, values)
    else
      self[key] = NamedArray.add_zipped(current_values.dup, values)
    end
  else
    if insitu && insitu != :lax
      self[key] = values.dup
    else
      self[key] = values
    end
  end
end