Module: TSV
- Extended by:
- Annotation
- Defined in:
- lib/scout/tsv.rb,
lib/scout/tsv/csv.rb,
lib/scout/tsv/open.rb,
lib/scout/tsv/util.rb,
lib/scout/tsv/index.rb,
lib/scout/tsv/attach.rb,
lib/scout/tsv/dumper.rb,
lib/scout/tsv/parser.rb,
lib/scout/tsv/stream.rb,
lib/scout/tsv/traverse.rb,
lib/scout/tsv/change_id.rb,
lib/scout/tsv/util/melt.rb,
lib/scout/tsv/util/sort.rb,
lib/scout/tsv/util/unzip.rb,
lib/scout/tsv/transformer.rb,
lib/scout/tsv/util/filter.rb,
lib/scout/tsv/util/select.rb,
lib/scout/association/item.rb,
lib/scout/tsv/util/process.rb,
lib/scout/tsv/util/reorder.rb,
lib/scout/tsv/change_id/translate.rb
Defined Under Namespace
Classes: Dumper, Parser, Transformer
Constant Summary collapse
- KEY_PARAMETERS =
begin params = [] (method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name| params << name if type == :key end params end
Class Method Summary collapse
- .acceptable_parser_options(func = nil) ⇒ Object
- .all_fields(file) ⇒ Object
- .attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) ⇒ Object
- .cast_value(value, cast) ⇒ Object
- .change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) ⇒ Object
- .change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) ⇒ Object
- .collapse_stream(stream, *args, **kwargs, &block) ⇒ Object
- .concat_streams(streams) ⇒ Object
- .csv(obj, options = {}) ⇒ Object
- .field_match_counts(file, values, options = {}) ⇒ Object
- .identifier_files(obj) ⇒ Object
- .identify_field(key_field, fields, name, strict: nil) ⇒ Object
- .identify_field_in_obj(obj, field) ⇒ Object
- .incidence(tsv, **kwargs) ⇒ Object
- .index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) ⇒ Object
- .match_keys(source, other, match_key: nil, other_key: nil) ⇒ Object
- .open(file, options = {}) ⇒ Object
- .original_setup ⇒ Object
- .parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block) ⇒ Object
- .parse_header(stream, fix: true, header_hash: '#', sep: "\t") ⇒ Object
- .parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) ⇒ Object
- .parse_options ⇒ Object
- .parse_stream(stream, data: nil, source_type: nil, sep: "\t", type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) ⇒ Object
- .paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil) ⇒ Object
- .pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
- .process_stream(stream, header_hash: "#", &block) ⇒ Object
- .range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
- .select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) ⇒ Object
- .select_prefix_str(select) ⇒ Object
- .setup(obj, *rest, &block) ⇒ Object
- .str2options(str) ⇒ Object
- .str_setup(option_str, obj) ⇒ Object
- .translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) ⇒ Object
- .translation_index(files, source, target, persist_options = {}) ⇒ Object
- .translation_path(file_fields, source, target) ⇒ Object
- .traverse(*args, **kwargs, &block) ⇒ Object
- .unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) ⇒ Object
Instance Method Summary collapse
- #[](key, *rest) ⇒ Object
- #add_field(name = nil) ⇒ Object
- #all_fields ⇒ Object
- #attach(*args, **kwargs) ⇒ Object
- #change_id(*args, **kwargs) ⇒ Object
- #change_key(*args, **kwargs) ⇒ Object
- #chunked_values_at(keys, max = 5000) ⇒ Object
- #collapse_stream(*args, **kwargs, &block) ⇒ Object
- #collect(*args, &block) ⇒ Object
- #column(field, **kwargs) ⇒ Object
- #digest_str ⇒ Object
- #dumper_stream(options = {}) ⇒ Object (also: #stream)
- #each(*args, &block) ⇒ Object
- #filter(filter_dir = nil) ⇒ Object
- #fingerprint ⇒ Object
- #head(max = 10) ⇒ Object
- #identifier_files ⇒ Object
- #identify_field(name, strict: nil) ⇒ Object
- #index(*args, **kwargs, &block) ⇒ Object
- #inspect ⇒ Object
- #melt_columns(value_field, column_field) ⇒ Object
- #merge(other) ⇒ Object
- #merge_zip(other) ⇒ Object
- #options ⇒ Object
- #page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object
- #pos_index(*args, **kwargs, &block) ⇒ Object
- #prepare_entity ⇒ Object
- #process(field, &block) ⇒ Object
- #range_index(*args, **kwargs, &block) ⇒ Object
- #remove_duplicates(pivot = 0) ⇒ Object
- #reorder(key_field = nil, fields = nil, merge: true, one2one: true, data: nil, unnamed: true, **kwargs) ⇒ Object
- #reset_filters ⇒ Object
- #select(method = nil, invert = false, &block) ⇒ Object
- #slice(fields, **kwargs) ⇒ Object
- #sort(field = nil, just_keys = false, &block) ⇒ Object
- #sort_by(field = nil, just_keys = false, &block) ⇒ Object
- #subset(keys) ⇒ Object
- #summary ⇒ Object
- #to_double ⇒ Object
- #to_flat ⇒ Object
- #to_hash ⇒ Object
- #to_list ⇒ Object
- #to_s(options = {}) ⇒ Object
- #to_single ⇒ Object
- #translate(*args, **kwargs) ⇒ Object
- #transpose(key_field = "Unkown ID") ⇒ Object
- #transpose_double(key_field = "Unkown ID") ⇒ Object
- #transpose_list(key_field = "Unkown ID") ⇒ Object
- #traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) ⇒ Object (also: #through)
- #unzip(*args, **kwargs) ⇒ Object
- #unzip_replicates ⇒ Object
- #with_filters(filters, &block) ⇒ Object
- #with_unnamed(unnamed = nil) ⇒ Object
- #write_file(file) ⇒ Object
- #zip(merge = false, field = "New Field", sep = ":") ⇒ Object
- #zip_new(key, values, insitu: :lax) ⇒ Object
Methods included from Annotation
list_tsv_values, load_info, load_tsv, load_tsv_values, obj_tsv_values, resolve_tsv_array, tsv
Class Method Details
.acceptable_parser_options(func = nil) ⇒ Object
3 4 5 6 7 8 9 10 11 |
# File 'lib/scout/tsv/parser.rb', line 3 def self.(func = nil) if func.nil? TSV.method(:parse_line).parameters.collect{|a| a.last } + TSV.method(:parse_stream).parameters.collect{|a| a.last } + TSV.method(:parse).parameters.collect{|a| a.last } - [:line, :block] else TSV.method(func).parameters.collect{|a| a.last } end.uniq end |
.all_fields(file) ⇒ Object
155 156 157 158 159 160 161 |
# File 'lib/scout/tsv/util.rb', line 155 def self.all_fields(file) if file.respond_to?(:all_fields) file.all_fields else TSV.parse_header(file)["all_fields"] end end |
.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
# File 'lib/scout/tsv/attach.rb', line 45 def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil) source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source other = TSV::Parser.new other unless TSV === other || TSV::Parser === other fields = [fields] if String === fields match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key) if ! (TSV === other) other_key_name = other_key == :key ? other.key_field : other.fields[other_key] other = TSV.open other, key_field: other_key_name, fields: fields, one2one: true, persist: persist_input other_key = :key if other.key_field == source.key_field end if TSV::Transformer === source source.dumper = case target when :stream TSV::Dumper.new(source..merge(sep: "\t")) when nil TSV.setup({}, **source..dup) else target end end other.with_unnamed do source.with_unnamed do other_key_name = other_key == :key ? other.key_field : other_key other_key_name = other.fields[other_key_name] if Integer === other_key fields = other.all_fields - [other_key_name, source.key_field] if fields.nil? match_key_name = match_key == :key ? source.key_field : match_key_name if index.nil? && ! source.identify_field(other_key_name) identifier_files = [] identifier_files << identifiers if identifiers identifier_files << source identifier_files << TSV.identifier_files(source) identifier_files << TSV.identifier_files(other) identifier_files << other index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name) end if other_key != :key other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double end other_field_positions = other.identify_field(fields.dup) fields.zip(other_field_positions) do |o,n| raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil? end = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})" Log.debug = if TrueClass === new = fields - source.fields source.fields = (source.fields + fields).uniq overlaps = source.identify_field(fields) orig_type = source.type type = source.type == :single ? :list : source.type empty_other_values = case type when :list [nil] * other.fields.length when :flat [] when :double [[]] * other.fields.length end empty_other_values = nil if other.type == :single insitu = TSV === source ? true : false if insitu.nil? insitu = false if source.type == :single match_key_pos = source.identify_field(match_key) source.traverse bar: , unnamed: true do |orig_key,current_values| current_values = [current_values] if source.type == :single keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos] keys = [keys].compact unless Array === keys keys = index.chunked_values_at(keys).flatten if index current_values = current_values.dup unless insitu keys = [nil] if keys.empty? keys.each do |current_key| other_values = current_key.nil? ? empty_other_values : other[current_key] if other_values.nil? other_values = empty_other_values elsif other.type == :flat other_values = [other_values] elsif other.type == :list && source.type == :double other_values = other_values.collect{|v| [v] } elsif other.type == :double && source.type == :list other_values = other_values.collect{|v| v.first } end other_values = other_field_positions.collect do |pos| if pos == :key current_key else other.type == :single ? other_values : other_values[pos] end end other_values.zip(overlaps).each do |v,overlap| if type == :list current_values[overlap] = v if current_values[overlap].nil? || (String === current_values[overlap] && current_values[overlap].empty?) elsif type == :flat next if v.nil? v = [v] unless Array === v current_values.concat v else current_values[overlap] ||= [] next if v.nil? v = [v] unless Array === v current_values[overlap].concat (v - current_values[overlap]) end end end source[orig_key] = current_values unless insitu nil end if complete && match_key == :key empty_self_values = case type when :list [nil] * source.fields.length when :flat [] when :double [[]] * source.fields.length end other.each do |other_key,other_values| next if source.include?(other_key) if other.type == :flat other_values = [other_values] elsif other.type == :single other_values = [other_values] elsif other.type == :list && type == :double other_values = other_values.collect{|v| [v] } elsif other.type == :double && type == :list other_values = other_values.collect{|v| v.first } end new_values = case type when :list [nil] * source.fields.length when :flat [] when :double source.fields.length.times.collect{ [] } end other_values.zip(overlaps).each do |v,overlap| next if v.nil? if overlap == :key other_key = Array === v ? v : v.first elsif type == :list new_values[overlap] = v if new_values[overlap].nil? || (String === new_values[overlap] && new_values[overlap].empty?) else v = [v] unless Array === v new_values[overlap].concat v end end source[other_key] = new_values end end source.type = type end end source end |
.cast_value(value, cast) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/scout/tsv/parser.rb', line 13 def self.cast_value(value, cast) if Array === value value.collect{|e| cast_value(e, cast) } else if Proc === cast cast.call value else if value.nil? || value == "" nil else value.send(cast) end end end end |
.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) ⇒ Object
33 34 35 36 37 38 39 40 41 |
# File 'lib/scout/tsv/change_id.rb', line 33 def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false) source = TSV::Parser.new source if String === source identifiers = identifiers.nil? ? source.identifiers : identifiers new_fields = source.fields.dup new_fields[new_fields.index(source_id)] = new_id return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields) end |
.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/scout/tsv/change_id.rb', line 4 def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil) source = TSV::Parser.new source if String === source identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers) if identifiers && source.identify_field(new_key_field, strict: true).nil? identifiers = identifiers.nil? ? source.identifiers : identifiers if Array === identifiers identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last end new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers) new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge) return new end fields = source.fields.dup - [new_key_field] fields.unshift source.key_field if keep transformer = TSV::Transformer.new source transformer.key_field = new_key_field transformer.fields = fields transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v| [k, v] end stream ? transformer : transformer.tsv(merge: merge, one2one: one2one) end |
.collapse_stream(stream, *args, **kwargs, &block) ⇒ Object
221 222 223 224 225 226 227 |
# File 'lib/scout/tsv/open.rb', line 221 def self.collapse_stream(stream, *args, **kwargs, &block) stream = stream.stream if stream.respond_to?(:stream) self.process_stream(stream) do |sin, line| collapsed = Open.collapse_stream(stream, line: line) Open.consume_stream(collapsed, false, sin) end end |
.concat_streams(streams) ⇒ Object
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
# File 'lib/scout/tsv/stream.rb', line 219 def self.concat_streams(streams) streams = streams.collect do |stream| case stream when(defined? Step and Step) stream.stream when Path stream.open when TSV::Dumper stream.stream when TSV stream.dumper_stream else stream end end.compact done_streams = [] Open.open_pipe do |sin| first_stream = streams.first while line = first_stream.gets sin.write line break unless line[0] == "#" end while streams.any? streams.each do |stream| line = stream.gets sin.write line unless line[0] == "#" end streams.delete_if{|stream| stream.eof? } end end end |
.csv(obj, options = {}) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/scout/tsv/csv.rb', line 4 def self.csv(obj, = {}) = IndiferentHash.add_defaults , :headers => true, :type => :list headers = [:headers] noheaders = ! headers type = .delete :type cast = .delete :cast merge = .delete :merge key_field = .delete :key_field fields = .delete :fields if key_field || fields orig_type = type type = :double merge = true end [:headers] = false csv = case obj when Path CSV.read obj.find.open, ** when String if Open.remote?(obj) CSV.read Open.open(obj), ** elsif Path.is_filename?(obj) CSV.read obj, ** else CSV.new obj, ** end else CSV.new obj, ** end tsv = if noheaders TSV.setup({}, :key_field => nil, :fields => nil, :type => type) else key, *csv_fields = csv.shift TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type) end csv.each_with_index do |row,i| if noheaders key, values = ["row-#{i}", row] else key, *values = row end if cast values = values.collect{|v| v.send cast } end case type when :double, :flat tsv.zip_new(key, values) when :single tsv[key] = values.first when :list tsv[key] = values end end if key_field || fields tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true) if tsv.type != orig_type tsv = case orig_type when :list tsv.to_list when :single tsv.to_single when :list tsv.to_list when :flat tsv.to_flat end end end tsv end |
.field_match_counts(file, values, options = {}) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/scout/tsv/util.rb', line 13 def self.field_match_counts(file, values, = {}) = IndiferentHash.add_defaults , :persist_prefix => "Field_Matches" = IndiferentHash.pull_keys , :persist filename = TSV === file ? file.filename : file path = Persist.persist filename, :string, .merge(:no_load => true) do tsv = TSV === file ? file : TSV.open(file, ) text = "" fields = nil tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names| names.zip(fields).each do |list, format| list = [list] unless Array === list list.delete_if do |name| name.empty? end next if list.empty? text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n" end text << [gene, tsv.key_field] * "\t" << "\n" end text end path = path.find if Path === path TmpFile.with_file(values.uniq * "\n", false) do |value_file| cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'" begin TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i) rescue Log.exception $! TSV.setup({}, :type => :single, :cast => :to_i) end end end |
.identifier_files(obj) ⇒ Object
255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
# File 'lib/scout/tsv/attach.rb', line 255 def self.identifier_files(obj) if TSV === obj obj.identifier_files elsif Path.is_filename?(obj) path = Path === obj ? obj : Path.setup(obj) if obj.dirname.identifiers.exists? obj.dirname.identifiers else [TSV.(obj)[:identifiers]] end else nil end end |
.identify_field(key_field, fields, name, strict: nil) ⇒ Object
47 48 49 50 51 |
# File 'lib/scout/tsv/util.rb', line 47 def self.identify_field(key_field, fields, name, strict: nil) return :key if name == :key || (! strict && NamedArray.field_match(key_field, name)) name.collect!{|n| NamedArray.field_match(key_field, n) ? :key : n } if Array === name NamedArray.identify_name(fields, name, strict: strict) end |
.identify_field_in_obj(obj, field) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/scout/tsv/change_id/translate.rb', line 3 def self.identify_field_in_obj(obj, field) case obj when TSV obj.identify_field(field) when TSV::Parser, TSV::Dumper TSV.identify_field(obj.key_field, obj.fields, field) when Path, String all_fields = TSV.parse_header(obj)["all_fields"] identify_field_in_obj(all_fields, field) when Array key_field, *fields = obj TSV.identify_field(key_field, fields, field) end end |
.incidence(tsv, **kwargs) ⇒ Object
224 225 226 |
# File 'lib/scout/association/item.rb', line 224 def self.incidence(tsv, **kwargs) AssociationItem.incidence Association.index(tsv, **kwargs).keys end |
.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/scout/tsv/index.rb', line 40 def self.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true engine = IndiferentHash. kwargs, :engine fields = :all if fields.nil? prefix = case fields when :all "Index[#{target}]" else "Index[#{Array === fields ? fields * "," : fields}->#{target}]" end prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :engine => :HDB, :persist => false = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:engine], .merge(other_options: kwargs.merge(target: target, fields: fields, order: order, data_options: ))) do |filename| if filename index = ScoutCabinet.open(filename, true, engine) TSV.setup(index, :type => :single) index.extend TSVAdapter else index = TSV.setup({}, :type => :single) end log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" Log.low log_msg = log_msg if TrueClass === if order tmp_index = {} include_self = fields == :all || (Array === fields) && fields.include?(target) target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, bar: , **kwargs do |k,values| tmp_index[k] ||= [[k]] if include_self values.each_with_index do |list,i| i += 1 if include_self list.each do |e| tmp_index[e] ||= [] tmp_index[e][i] ||= [] tmp_index[e][i] << k end end end tmp_index.each do |e,list| index[e] = list.flatten.compact.uniq.first end index.key_field = source_field_names * "," index.fields = [target_key_field] tmp_index = {} else target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: , **kwargs do |k,values| values.each do |e| index[e] = k unless index.include?(e) end end end index.key_field = source_field_names * "," index.fields = [target_key_field] index end end |
.match_keys(source, other, match_key: nil, other_key: nil) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/scout/tsv/attach.rb', line 3 def self.match_keys(source, other, match_key: nil, other_key: nil) #match_key = (source.all_fields & other.all_fields).first if match_key.nil? if match_key.nil? match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first match_key = source.all_fields[match_key_pos] if match_key_pos end if match_key.nil? source.all_fields.collect do |f| other_key = other.identify_field(f) if other_key other_key = other.key_field if other_key == :key match_key = f break end end end if match_key.nil? other.all_fields.collect do |f| match_key = source.identify_field(f) if match_key other_key = f break end end end match_key = source.key_field if match_key.nil? if other_key.nil? other_key = other.identify_field(match_key) end other_key = other.key_field if other_key.nil? match_key = :key if NamedArray.field_match(match_key, source.key_field) other_key = :key if NamedArray.field_match(other_key, other.key_field) [match_key, other_key] end |
.open(file, options = {}) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/scout/tsv.rb', line 75 def self.open(file, = {}) grep, invert_grep, fixed_grep, nocache, monitor, , unnamed, field = IndiferentHash. , :grep, :invert_grep, :fixed_grep, :nocache, :monitor, :entity_options, :unnamed, :field if field and [:field].nil? [:fields] = [field] [:type] ||= :single end = IndiferentHash.pull_keys , :persist = IndiferentHash.add_defaults , prefix: "TSV", type: :HDB, persist: false [:data] ||= [:data] [:update] = .delete(:update) if .include?(:update) file = StringIO.new file if String === file && ! (Path === file) && file.index("\n") source_name, = case file when StringIO [file.inspect, ] when TSV::Parser [file.[:filename], file.] else [file, ] end Persist.tsv(source_name, , persist_options: ) do |data| [:data] = data if data [:filename] ||= if TSV::Parser === file file.[:filename] elsif Path === file file elsif file.respond_to?(:filename) file.filename elsif Path.is_filename?(file) file else nil end if data Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}" else Log.debug "TSV open #{Log.fingerprint file}" end tsv = if TSV::Parser === file TSV.parse(file, **) else [:tsv_invert_grep] ||= invert_grep if invert_grep Open.open(file, grep: grep, invert_grep: invert_grep, fixed_grep: fixed_grep, nocache: nocache) do |f| TSV.parse(f, **) end end if tsv.identifiers.nil? and Path === tsv.filename and tsv.filename.identifier_file_path tsv.identifiers = tsv.filename.identifier_file_path.find if tsv.filename.identifier_file_path.exists? end tsv.unnamed = unnamed unless unnamed.nil? tsv. = tsv end end |
.original_setup ⇒ Object
34 |
# File 'lib/scout/tsv.rb', line 34 alias original_setup setup |
.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block) ⇒ Object
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 |
# File 'lib/scout/tsv/parser.rb', line 470 def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block) parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep) cast = kwargs[:cast] cast = parser.[:cast] if cast.nil? identifiers = kwargs.delete(:identifiers) type = kwargs[:type] ||= parser.[:type] ||= :double if (data = kwargs[:data]) && data.respond_to?(:persistence_class) TSV.setup(data, type: type) data.extend TSVAdapter serializer ||= if cast case [cast, type] when [:to_i, :single] :integer when [:to_i, :list], [:to_i, :flat] :integer_array when [:to_f, :single] :float when [:to_f, :list], [:to_f, :flat] :float_array when [:to_f, :double], [:to_i, :double] :marshal else type end else type end data.serializer = TSVAdapter::SERIALIZER_ALIAS[serializer] || serializer end kwargs[:data] = {} if kwargs[:data].nil? data = parser.traverse **kwargs, &block data.type = type data.cast = cast data.filename = filename || parser.[:filename] if data.filename.nil? data.namespace = namespace || parser.[:namespace] if data.namespace.nil? data.identifiers = identifiers || parser.[:identifiers] if data.identifiers.nil? data.unnamed = unnamed data.save_annotation_hash if data.respond_to?(:save_annotation_hash) data end |
.parse_header(stream, fix: true, header_hash: '#', sep: "\t") ⇒ Object
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
# File 'lib/scout/tsv/parser.rb', line 257 def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t") sep = "\t" if sep.nil? if (Path === stream) || ((String === stream) && Path.is_filename?(stream)) Open.open(stream) do |f| return parse_header(f, fix: fix, header_hash: header_hash, sep: sep) end end if IO === stream && stream.closed? stream.join if stream.respond_to?(:join) raise "Closed stream" end opts = {} preamble = [] # Get line begin #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream line = stream.gets return {} if line.nil? line = Misc.fixutf8 line.chomp if fix # Process options line if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/)) opts = IndiferentHash.string2hash m.captures.first.chomp line = stream.gets if line && fix if Proc === fix line = fix.call line else line = Misc.fixutf8 line.chomp if line && fix end end end # Determine separator sep = opts[:sep] if opts[:sep] # Process fields line preamble << line if line while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash))) fields = line.split(sep, -1) key_field = fields.shift key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty? line = (header_hash != "" ? stream.gets : nil) line = Misc.fixutf8 line.chomp if line preamble << line if line break if TrueClass === header_hash || header_hash == "" end preamble = preamble[0..-3] * "\n" line ||= stream.gets first_line = line opts[:type] = opts[:type].to_sym if opts[:type] opts[:cast] = opts[:cast].to_sym if opts[:cast] all_fields = [key_field] + fields if key_field && fields namespace = opts[:namespace] NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace)) rescue Exception raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception stream.abort($!) if stream.respond_to?(:abort) raise $! end end |
.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/scout/tsv/parser.rb', line 29 def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil) items = line.split(sep, -1) return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2) if String === key raise "Key by name, but no field names" if field_names.nil? key = field_names.index key raise "Key #{key} not found in field names #{Log.fingerprint field_names}" if key.nil? end if positions.nil? && key == 0 key = items.shift elsif positions.nil? if type == :flat key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten items = items.slice(0,1) else key = items.delete_at(key) end key = key.split(sep2) if type == :double else key, items = items[key], items.values_at(*positions) key = key.split(sep2) if type == :double || type == :flat end items = case type when :list items when :single items.first when :flat items.collect{|i| i.split(sep2, -1) }.flatten when :double items.collect{|i| i.nil? ? [] : i.split(sep2, -1) } end if cast items = cast_value(items, cast) end [key, items] end |
.parse_options ⇒ Object
329 330 331 |
# File 'lib/scout/tsv/parser.rb', line 329 def self.(...) parse_header(...)[:options] end |
.parse_stream(stream, data: nil, source_type: nil, sep: "\t", type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/scout/tsv/parser.rb', line 74 def self.parse_stream(stream, data: nil, source_type: nil, sep: "\t", type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block) begin = "Parsing #{Log.fingerprint stream}" if TrueClass === = Log::ProgressBar.(stream, ) if .init if source_type = type if source_type.nil? type_swap_key = [source_type.to_s, type.to_s] * "_" same_type = source_type.to_s == type.to_s if data && data.respond_to?(:load_stream) && data.serializer.to_s.include?("String") && same_type && ! (head || kwargs[:cast] || kwargs[:positions] || (kwargs[:key] && kwargs[:key] != 0) || Proc === fix ) && (sep.nil? || sep == "\t") Log.debug "Loading #{Log.fingerprint stream} directly into #{Log.fingerprint data}" if first_line full_stream = Open.open_pipe do |sin| sin.puts first_line Open.consume_stream(stream, false, sin) end data.load_stream(full_stream) else data.load_stream(stream) end return data end data = {} if data.nil? merge = false if type != :double && type != :flat line = first_line || stream.gets while line break if head && head <= 0 begin line.chomp! if Proc === fix line = fix.call line break if (FalseClass === line) || :break == line next if line.nil? elsif fix line = Misc.fixutf8(line) end .tick if if type == :array || type == :line block.call line next elsif type == :matrix parts = line.split(sep) block.call parts next end key, items = parse_line(line, type: source_type, field_names: field_names, sep: sep, **kwargs) next if key.nil? if Array === key keys = key if one2one key_items = keys.length.times.collect{|i| items.collect{|list| [list[i] || list[0]] } } else key_items = false end else keys = [key] key_items = false end keys.each_with_index do |key,i| if key_items these_items = key_items[i] else these_items = items end these_items = case type_swap_key when "single_single" these_items when "list_single" these_items.first when "flat_single" these_items.first when "double_single" these_items.first.first when "single_list" [these_items] when "list_list" these_items when "flat_list" these_items when "double_list" these_items.collect{|l| l.first } when "single_flat" [these_items] when "list_flat" these_items when "flat_flat" these_items when "double_flat" these_items.flatten when "single_double" [[these_items]] when "list_double" these_items.collect{|l| l.nil? ? [] : [l] } when "flat_double" [these_items] when "double_double" these_items end if block_given? res = block.call(key, these_items, field_names) data[key] = res unless res.nil? || FalseClass === data next end if ! merge || ! data.include?(key) these_items = these_items.collect{|i| i.empty? ? [nil] : i } if type == :double && one2one data[key] = these_items elsif type == :double current = data[key] if merge == :concat these_items.each_with_index do |new,i| new = one2one ? [nil] : [] if new.empty? current[i].concat(new) end else merged = [] these_items.each_with_index do |new,i| new = one2one ? [nil] : [] if new.empty? merged[i] = (current[i] || []) + new end data[key] = merged end elsif type == :flat current = data[key] if merge == :concat current[i].concat these_items else data[key] = current + these_items end end end rescue Exception raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception stream.abort($!) if stream.respond_to?(:abort) raise $! ensure head = head - 1 if head if stream.closed? line = nil else line = stream.gets end end end data ensure if stream.respond_to?(:stream_exception) && stream.stream_exception .remove(stream.stream_exception) else .remove end if if stream.respond_to?(:join) eof = begin stream.eof? rescue IOError true end stream.join if eof end end end |
.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
# File 'lib/scout/tsv/stream.rb', line 2 def self.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil) sep = "\t" if sep.nil? streams = streams.collect do |stream| case stream when(defined? Step and Step) stream.stream when Path stream.open when TSV::Dumper stream.stream when TSV stream.dumper_stream else stream end end.compact num_streams = streams.length streams = streams.collect do |stream| Open.sort_stream(stream, memory: sort_memory, cmd_args: sort_cmd_args) end if sort begin lines =[] fields =[] sizes =[] key_fields =[] =[] empty =[] preambles =[] parser_types =[] type ||= :double streams = streams.collect do |stream| parser = TSV::Parser.new stream, sep: sep #parser.type = type sfields = parser.fields if field_prefix index = streams.index stream prefix = field_prefix[index] sfields = sfields.collect{|f|[prefix, f]* ":"} end first_line = parser.first_line first_line = nil if first_line == "" lines << first_line key_fields << parser.key_field fields << sfields sizes << sfields.length if sfields << parser. preambles << parser.preamble if preamble and not parser.preamble.empty? parser_types << parser.type empty << stream if parser.first_line.nil? || parser.first_line.empty? stream end all_fields = fields.dup key_field = key_fields.compact.first if same_fields fields = fields.first else fields = fields.compact.flatten end = .first type ||= [:type] type ||= :list if type == :single type ||= :double if type == :flat preamble_txt = case preamble when TrueClass preambles * "\n" when String if preamble[0]== '+' preambles * "\n" + "\n" + preamble[1..-1] else preamble end else nil end empty_pos = empty.collect{|stream| streams.index stream} keys =[] parts =[] lines.each_with_index do |line,i| if line.nil? || line.empty? keys[i]= nil parts[i]= nil else vs = line.split(sep, -1) key, *p = vs p = [p] if parser_types[i] == :flat keys[i]= key parts[i]= p end sizes[i] ||= parts[i].length unless parts[i].nil? end done_streams =[] fields = nil if fields && fields.empty? dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one dumper.init(preamble: preamble_txt || !!key_field) t = Thread.new do Thread.report_on_exception = false Thread.current["name"] = "Paste streams" last_min = nil while lines.reject{|line| line.nil?}.any? min = keys.compact.sort.first break if min.nil? new_values =[] skip = all_match && keys.uniq !=[min] keys.each_with_index do |key,i| case key when min new_parts = parts[i] begin line = lines[i]= begin streams[i].gets rescue Log.exception $! nil end if line.nil? keys[i]= nil parts[i]= nil else k, *p = line.chomp.split(sep, -1) p = p.collect{|e| e.nil? ? "" : e } p = [p] if parser_types[i] == :flat if k == keys[i] new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] } raise TryAgain end keys[i]= k parts[i]= p end new_values << new_parts rescue TryAgain keys[i]= nil parts[i]= nil retry end else p = [nil] * sizes[i] new_values << p end end next if skip if same_fields new_values_same = [] new_values.each do |list| list.each_with_index do |l,i| new_values_same[i] ||= [] new_values_same[i] << l end end new_values = new_values_same else new_values = new_values.inject([]){|acc,l| acc.concat l } end new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one dumper.add min, new_values end dumper.close streams.each do |stream| stream.close if stream.respond_to?(:close) stream.join if stream.respond_to?(:join) end end rescue Aborted Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}" streams.each do |stream| stream.abort if stream.respond_to? :abort end raise $! rescue Exception Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}" streams.each do |stream| stream.abort if stream.respond_to? :abort end raise $! end Thread.pass until t["name"] ConcurrentStream.setup(dumper.stream, threads: [t]) end |
.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/scout/tsv/index.rb', line 159 def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true type, data_persist = IndiferentHash. kwargs, :type prefix = "PositionIndex[#{pos_field}]" prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :type => :fwt, :persist => true = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:type], .merge(other_options: kwargs.merge(pos_field: pos_field, key_field: key_field))) do |filename| tsv_file = TSV.open(tsv_file, *) if [:persist] && ! TSV === tsv_file log_msg = "PositionIndex #{Log.fingerprint tsv_file} #{pos_field}" Log.low log_msg = log_msg if TrueClass === max_key_size = 0 index_data = [] TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :flat, cast: :to_i, bar: , **kwargs do |key, pos| key_size = key.length max_key_size = key_size if key_size > max_key_size if Array === pos pos.each do |p| index_data << [key, p] end else index_data << [key, pos] end end filename = :memory if filename.nil? index = FixWidthTable.get(filename, max_key_size, false) index.add_point index_data index.read index end end |
.process_stream(stream, header_hash: "#", &block) ⇒ Object
211 212 213 214 215 216 217 218 219 |
# File 'lib/scout/tsv/open.rb', line 211 def self.process_stream(stream, header_hash: "#", &block) sout = Open.open_pipe do |sin| while line = stream.gets break unless line.start_with?(header_hash) sin.puts line end yield sin, line end end |
.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/scout/tsv/index.rb', line 115 def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs) kwargs = IndiferentHash.add_defaults kwargs, unnamed: true type, data_persist = IndiferentHash. kwargs, :type, :data_persist prefix = "RangeIndex[#{start_field}-#{end_field}]" prefix += select_prefix_str(kwargs[:select]) = IndiferentHash.pull_keys kwargs, :persist = IndiferentHash.add_defaults , :prefix => prefix, :type => :fwt, :persist => true = IndiferentHash.pull_keys kwargs, :data Persist.persist(tsv_file, [:type], .merge(other_options: kwargs.merge(start_field: start_field, end_field: end_field, key_field: key_field))) do |filename| tsv_file = TSV.open(tsv_file, *) if [:persist] && ! TSV === tsv_file log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}" Log.low log_msg = log_msg if TrueClass === max_key_size = 0 index_data = [] TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: , unnamed: true, **kwargs do |key, values| key_size = key.length max_key_size = key_size if key_size > max_key_size start_pos, end_pos = values if Array === start_pos start_pos.zip(end_pos).each do |s,e| index_data << [key, [s.to_i, e.to_i]] end else index_data << [key, [start_pos.to_i, end_pos.to_i]] end end filename = :memory if filename.nil? index = FixWidthTable.get(filename, max_key_size, true) index.add_range index_data index.read index end end |
.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/scout/tsv/util/select.rb', line 2 def self.select(key, values, method, fields: nil, field: nil, invert: false, type: nil, sep: nil, &block) return ! select(key, values, method, field: field, invert: false, type: type, sep: sep, &block) if invert return yield(key, values) if method.nil? && block_given if Hash === method if method.include?(:invert) method = method.dup invert = method.delete(:invert) return select(key, values, method, fields: fields, field: field, invert: invert, type: type, sep: sep, &block) end field = method.keys.first value = method[field] return select(key, values, value, fields: fields, field: field, invert: invert, type: type, sep: sep, &block) end if field field = NamedArray.identify_name(fields, field) if fields && String === field set = field == :key ? [key] : (type == :double ? values[field].split(sep) : values[field]) else set = [key, (type == :double ? values.collect{|v| v.split(sep) } : values)] end if Array === set set.flatten! else set = [set] end case method when Array (method & set).any? when Regexp set.select{|v| v =~ method }.any? when Symbol set.first.send(method) when Numeric set.size > method when String if block_given? field = method field = fields.index?(field) if fields && String === field case when block.arity == 1 if (method == key_field or method == :key) yield(key) else yield(values[method]) end when block.arity == 2 if (method == key_field or method == :key) yield(key, key) else yield(key, values[method]) end end elsif m = method.match(/^([<>]=?)(.*)/) set.select{|v| v.to_f.send($1, $2.to_f) }.any? else set.select{|v| v == method }.any? end when Proc set.select{|v| method.call(v) }.any? end end |
.select_prefix_str(select) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/scout/tsv/index.rb', line 6 def self.select_prefix_str(select) str = begin case select when nil nil when Array case select.first when nil nil when Array select.collect{|p| p * "="}*"," else select.collect{|p| p.to_s }*"=" end when Hash if select.empty? nil else select.collect do |key,value| [key.to_s, value.to_s] * "=" end * "," end end rescue Log.warn "Error in select_prefix_str: #{Log.fingerprint(select)}: #{$!.message}" str = nil end if str.nil? "" else "[select:#{str}]" end end |
.setup(obj, *rest, &block) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/scout/tsv.rb', line 36 def setup(obj, *rest, &block) if rest.length == 1 && String === rest.first = TSV.(rest.first) if Array === obj default_value = case [:type] when :double, :flat, :list, nil [] when :single nil end obj = IndiferentHash.array2hash(obj, default_value) end original_setup(obj, , &block) else if Array === obj = rest.first if Hash === rest.first ||= {} default_value = case [:type] when :double, :flat, :list, nil [] when :single nil end obj = IndiferentHash.array2hash(obj, default_value) end original_setup(obj, *rest, &block) end obj.save_annotation_hash if obj.respond_to?(:save_annotation_hash) obj end |
.str2options(str) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/scout/tsv.rb', line 21 def self.(str) ,_sep, rest = str.partition("#") key, fields_str = .split("~") fields = fields_str.nil? ? [] : fields_str.split(/,\s*/) rest = ":type=" << rest if rest =~ /^:?\w+$/ = rest.nil? ? {} : IndiferentHash.string2hash(rest) {:key_field => key, :fields => fields}.merge() end |
.str_setup(option_str, obj) ⇒ Object
70 71 72 73 |
# File 'lib/scout/tsv.rb', line 70 def self.str_setup(option_str, obj) = TSV.(option_str) setup(obj, **) end |
.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/scout/tsv/change_id/translate.rb', line 116 def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true) identifiers ||= tsv.identifier_files index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index) key_field, *fields = TSV.all_fields(tsv) if field == key_field new_key_field = format new_fields = fields else new_key_field = key_field new_fields = fields.collect{|f| f == field ? format : f } end field_pos = new_key_field == key_field ? new_fields.index(format) : :key transformer = TSV::Transformer.new tsv transformer.key_field = new_key_field transformer.fields = new_fields transformer.traverse one2one: one2one, unnamed: true do |k,v| if field_pos == :key [index[k], v] else v = v.dup if Array === v[field_pos] v[field_pos] = index.values_at(*v[field_pos]).compact else v[field_pos] = index[v[field_pos]] end [k, v] end end stream ? transformer : transformer.tsv(merge: merge, one2one: one2one) end |
.translation_index(files, source, target, persist_options = {}) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/scout/tsv/change_id/translate.rb', line 49 def self.translation_index(files, source, target, = {}) return nil if source == target = IndiferentHash.add_defaults .dup, :persist => true, :prefix => "Translation index" file_fields = {} files = [files] unless Array === files files.each do |file| #next if Path === file && ! Open.exist?(file) Path.setup file if String === file and not Path === file begin file = file.produce if Path === file raise "Could no produce file" if FalseClass === file rescue Log.warn $!. next end file = file.find if Path === file file_fields[file] = all_fields(file) end begin path = translation_path(file_fields, source, target) rescue exception = $! begin path = translation_path(file_fields, source, target) rescue raise exception end end name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})" second_target = if path.length == 1 target else file1, file2 = path.values_at 0, 1 pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2)) TSV.all_fields(file1)[pos.compact.first] end Persist.persist(name, "HDB", ) do index = path.inject(nil) do |acc,file| if acc.nil? if source.nil? if TSV === file acc = file.index target: second_target else acc = TSV.index(file, target: second_target) end else if TSV === file acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source) else acc = TSV.open(file, key_field: source) end end else acc = acc.attach file, insitu: false end acc end index.slice([target]).to_single end end |
.translation_path(file_fields, source, target) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/scout/tsv/change_id/translate.rb', line 18 def self.translation_path(file_fields, source, target) target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file } if source.nil? source_files = file_fields.keys else source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file } end if source && (one_step = target_files & source_files).any? [one_step.first] else source_fields = file_fields.values_at(*source_files).flatten target_fields = file_fields.values_at(*target_files).flatten if (common_fields = source_fields & target_fields).any? source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first [source_file, target_file] else file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? } middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first if middle_file source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first [source_file, middle_file, target_file] else raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target}. #{file_fields.empty? ? "No identifier files" : Log.fingerprint(file_fields)}" end end end end |
.traverse(*args, **kwargs, &block) ⇒ Object
207 208 209 |
# File 'lib/scout/tsv/open.rb', line 207 def self.traverse(*args, **kwargs, &block) Open.traverse(*args, **kwargs, &block) end |
.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/scout/tsv/util/unzip.rb', line 3 def self.unzip(source, field, target: nil, sep: ":", delete: true, type: :list, merge: false, one2one: true, bar: nil) source = TSV::Parser.new source if String === source field_pos = source.identify_field(field) new_fields = source.fields.dup field_name = new_fields[field_pos] new_fields.delete_at(field_pos) if delete new_key_field = [source.key_field, field_name] * sep type = :double if merge stream = target == :stream target = case target when :stream TSV::Dumper.new(source..merge(sep: "\t")) when nil TSV.setup({}) else target end target.fields = new_fields target.key_field = new_key_field target.type = type transformer = TSV::Transformer.new source, target, unnamed: true = "Unzip #{new_key_field}" if TrueClass === transformer.traverse unnamed: true, one2one: one2one, bar: do |k,v| if source.type == :double if one2one res = NamedArray.zip_fields(v).collect do |_v| field_value = _v[field_pos] if delete new_values = _v.dup new_values.delete_at field_pos else new_values = _v end new_key = [k,field_value] * sep new_values = new_values.collect{|e| [e] } if transformer.type == :double [new_key, new_values] end else all_values = v.collect{|e| e.dup } all_values.delete_at field_pos if delete res = NamedArray.zip_fields(v).collect do |_v| field_value = _v[field_pos] new_key = [k,field_value] * sep new_values = all_values if transformer.type == :double [new_key, new_values] end end MultipleResult.setup(res) else field_value = v[field_pos] if delete new_values = v.dup new_values.delete_at field_pos else new_values = v end new_key = [k,field_value] * sep new_values = new_values.collect{|e| [e] } if transformer.type == :double [new_key, new_values] end end stream ? transformer : transformer.tsv(merge: merge) end |
Instance Method Details
#[](key, *rest) ⇒ Object
57 58 59 60 61 |
# File 'lib/scout/tsv/util.rb', line 57 def [](key, *rest) v = super(key, *rest) NamedArray.setup(v, @fields, key) unless @unnamed || @type == :flat || ! (Array === v) v end |
#add_field(name = nil) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/scout/tsv/util/process.rb', line 46 def add_field(name = nil) keys.each do |key| values = self[key] new_values = yield(key, values) new_values = [new_values].compact if type == :double and not Array === new_values case when type == :single values = new_values when (values.nil? and (fields.nil? or fields.empty?)) values = [new_values] when values.nil? values = [nil] * fields.length + [new_values] when Array === values values += [new_values] else values << new_values end self[key] = values end if not fields.nil? and not name.nil? new_fields = self.fields + [name] self.fields = new_fields end self end |
#all_fields ⇒ Object
150 151 152 153 |
# File 'lib/scout/tsv/util.rb', line 150 def all_fields return [] if @fields.nil? [@key_field] + @fields end |
#attach(*args, **kwargs) ⇒ Object
228 229 230 |
# File 'lib/scout/tsv/attach.rb', line 228 def attach(*args, **kwargs) TSV.attach(self, *args, **kwargs) end |
#change_id(*args, **kwargs) ⇒ Object
43 44 45 |
# File 'lib/scout/tsv/change_id.rb', line 43 def change_id(*args, **kwargs) TSV.change_id(self, *args, **kwargs) end |
#change_key(*args, **kwargs) ⇒ Object
29 30 31 |
# File 'lib/scout/tsv/change_id.rb', line 29 def change_key(*args, **kwargs) TSV.change_key(self, *args, **kwargs) end |
#chunked_values_at(keys, max = 5000) ⇒ Object
272 273 274 275 276 277 278 |
# File 'lib/scout/tsv/util/select.rb', line 272 def chunked_values_at(keys, max = 5000) Misc.ordered_divide(keys, max).inject([]) do |acc,c| new = self.values_at(*c) new.annotate acc if new.respond_to? :annotate and acc.empty? acc.concat(new) end end |
#collapse_stream(*args, **kwargs, &block) ⇒ Object
229 230 231 |
# File 'lib/scout/tsv/open.rb', line 229 def collapse_stream(*args, **kwargs, &block) TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block) end |
#collect(*args, &block) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/scout/tsv/util.rb', line 98 def collect(*args, &block) if block_given? res = [] each do |k,v| res << yield(k, v) end res else super(*args) end end |
#column(field, **kwargs) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/scout/tsv/util/reorder.rb', line 47 def column(field, **kwargs) new_type = case type when :double, :flat :flat else :single end kwargs[:type] = new_type slice(field, **kwargs) end |
#digest_str ⇒ Object
171 172 173 |
# File 'lib/scout/tsv/util.rb', line 171 def digest_str "TSV:{" + Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << ";" << Log.fingerprint(self.values) << "}" end |
#dumper_stream(options = {}) ⇒ Object Also known as: stream
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/scout/tsv/dumper.rb', line 149 def dumper_stream( = {}) preamble, unmerge, keys, stream = IndiferentHash. , :preamble, :unmerge, :keys, :stream, :preamble => true, :unmerge => false unmerge = false unless @type === :double dumper = TSV::Dumper.new self.annotation_hash.merge() dump_entry = Proc.new do |k,value_list| if unmerge max = value_list.collect{|v| v.length}.max if unmerge == :expand and max > 1 value_list = value_list.collect do |values| if values.length == 1 [values.first] * max else values end end end NamedArray.zip_fields(value_list).each do |values| dumper.add k, values end else dumper.add k, value_list end end self.with_unnamed do if stream.nil? t = Thread.new do begin Thread.current.report_on_exception = true Thread.current["name"] = "Dumper thread" dumper.init(preamble: preamble) if keys keys.each do |k| dump_entry.call k, self[k] end else self.each &dump_entry end dumper.close rescue dumper.abort($!) end end Thread.pass until t["name"] stream = dumper.stream ConcurrentStream.setup(stream, :threads => [t]) stream else dumper.set_stream stream begin dumper.init(preamble: preamble) if keys keys.each do |k| dump_entry.call k, self[k] end else self.each &dump_entry end dumper.close rescue dumper.abort($!) end stream end end end |
#each(*args, &block) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/scout/tsv/util.rb', line 86 def each(*args, &block) if block_given? actual_unnamed = @unnamed.nil? ? true : @unnamed super(*args) do |k,v| NamedArray.setup(v, @fields) unless actual_unnamed || @type == :flat || ! (Array === v) block.call(k, v) end else super(*args) end end |
#filter(filter_dir = nil) ⇒ Object
285 286 287 288 289 290 |
# File 'lib/scout/tsv/util/filter.rb', line 285 def filter(filter_dir = nil) self.extend Filtered self.filter_dir = filter_dir self.filters = [] self end |
#fingerprint ⇒ Object
167 168 169 |
# File 'lib/scout/tsv/util.rb', line 167 def fingerprint "TSV:{" + Log.fingerprint(self.all_fields|| []) << ";" << Log.fingerprint(self.keys) << "}" end |
#head(max = 10) ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/scout/tsv/transformer.rb', line 180 def head(max=10) res = self.annotate({}) transformer = Transformer.new self, res i = 0 transformer.traverse do |k,v| i += 1 break if i > max [k, v] end res end |
#identifier_files ⇒ Object
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# File 'lib/scout/tsv/attach.rb', line 232 def identifier_files case when (identifiers and TSV === identifiers) [identifiers] when (identifiers and Array === identifiers) case when (TSV === identifiers.first or identifiers.empty?) identifiers else identifiers.collect{|f| Path === f ? f : Path.setup(f)} end when identifiers [ Path === identifiers ? identifiers : Path.setup(identifiers) ] when Path === filename path_files = filename.dirname.identifiers [path_files].flatten.compact.select{|f| f.exists?} when filename [Path.setup(filename.dup).dirname.identifiers] else [] end end |
#identify_field(name, strict: nil) ⇒ Object
53 54 55 |
# File 'lib/scout/tsv/util.rb', line 53 def identify_field(name, strict: nil) TSV.identify_field(@key_field, @fields, name, strict: strict) end |
#index(*args, **kwargs, &block) ⇒ Object
111 112 113 |
# File 'lib/scout/tsv/index.rb', line 111 def index(*args, **kwargs, &block) TSV.index(self, *args, **kwargs, &block) end |
#inspect ⇒ Object
175 176 177 |
# File 'lib/scout/tsv/util.rb', line 175 def inspect fingerprint end |
#melt_columns(value_field, column_field) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 |
# File 'lib/scout/tsv/util/melt.rb', line 2 def melt_columns(value_field, column_field) target = TSV.setup({}, :key_field => "ID", :fields => [key_field, value_field, column_field], :type => :list, :cast => cast) each do |k,values| i = 0 values.zip(fields).each do |v,f| target["#{k}:#{i}"] = [k,v,f] i+=1 end end target end |
#merge(other) ⇒ Object
179 180 181 |
# File 'lib/scout/tsv/util.rb', line 179 def merge(other) self.annotate(super(other)) end |
#merge_zip(other) ⇒ Object
183 184 185 186 187 |
# File 'lib/scout/tsv/util.rb', line 183 def merge_zip(other) other.each do |k,v| self.zip_new k, v end end |
#options ⇒ Object
63 64 65 |
# File 'lib/scout/tsv/util.rb', line 63 def annotation_hash end |
#page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) ⇒ Object
156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/scout/tsv/util/sort.rb', line 156 def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block) pstart = psize * (pnum - 1) pend = psize * pnum - 1 field = :key if field == "key" keys = sort_by(field || :key, true, &block) keys.reverse! if reverse if just_keys keys[pstart..pend] else select :key => keys[pstart..pend] end end |
#pos_index(*args, **kwargs, &block) ⇒ Object
206 207 208 |
# File 'lib/scout/tsv/index.rb', line 206 def pos_index(*args, **kwargs, &block) TSV.pos_index(self, *args, **kwargs, &block) end |
#prepare_entity ⇒ Object
4 5 6 |
# File 'lib/scout/tsv/util/sort.rb', line 4 def prepare_entity(...) Entity.prepare_entity(...) end |
#process(field, &block) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/scout/tsv/util/process.rb', line 2 def process(field, &block) field_pos = identify_field field through do |key, values| case when type == :single field_values = values when type == :flat field_values = values else next if values.nil? field_values = values[field_pos] end new_values = case when block.arity == 1 yield(field_values) when block.arity == 2 yield(field_values, key) when block.arity == 3 yield(field_values, key, values) else raise "Unexpected arity in block, must be 1, 2 or 3: #{block.arity}" end case when type == :single self[key] = new_values when type == :flat self[key] = new_values else if ! values[field_pos].frozen? && ! NamedArray === values && ((String === values[field_pos] && String === new_values) || (Array === values[field_pos] && Array === new_values)) values[field_pos].replace new_values else values[field_pos] = new_values end self[key] = values end end self end |
#range_index(*args, **kwargs, &block) ⇒ Object
202 203 204 |
# File 'lib/scout/tsv/index.rb', line 202 def range_index(*args, **kwargs, &block) TSV.range_index(self, *args, **kwargs, &block) end |
#remove_duplicates(pivot = 0) ⇒ Object
76 77 78 79 80 81 82 |
# File 'lib/scout/tsv/util/process.rb', line 76 def remove_duplicates(pivot = 0) new = self.annotate({}) self.through do |k,values| new[k] = NamedArray.zip_fields(NamedArray.zip_fields(values).uniq) end new end |
#reorder(key_field = nil, fields = nil, merge: true, one2one: true, data: nil, unnamed: true, **kwargs) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/scout/tsv/util/reorder.rb', line 4 def reorder(key_field = nil, fields = nil, merge: true, one2one: true, data: nil, unnamed: true, **kwargs) res = data || self.annotate({}) res.type = kwargs[:type] if kwargs.include?(:type) kwargs[:one2one] = one2one key_field_name, field_names = with_unnamed unnamed do traverse key_field, fields, **kwargs do |k,v| if res.type == :double && merge && res.include?(k) current = res[k] if merge == :concat v.each_with_index do |new,i| next if new.empty? current[i].concat(new) end else merged = [] v.each_with_index do |new,i| next if new.empty? merged[i] = current[i] + new end res[k] = merged end elsif res.type == :flat if merge == :concat res[k] ||= [] res[k].concat v else res[k] = res[k].nil? ? v : res[k] + v end else res[k] = v end end end res.key_field = key_field_name res.fields = field_names res end |
#reset_filters ⇒ Object
292 293 294 295 296 297 298 299 300 301 |
# File 'lib/scout/tsv/util/filter.rb', line 292 def reset_filters if @filter_dir.nil? or @filter_dir.empty? @filters.each do |filter| filter.reset end if Array === @filters return end Dir.glob(File.join(@filter_dir, '*.filter')).each do |f| FileUtils.rm f end end |
#select(method = nil, invert = false, &block) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/scout/tsv/util/select.rb', line 68 def select(method = nil, invert = false, &block) new = TSV.setup({}, :key_field => key_field, :fields => fields, :type => type, :filename => filename, :identifiers => identifiers) self.annotate(new) case when (method.nil? and block_given?) through do |key, values| new[key] = values if invert ^ (yield key, values) end when Array === method method = Set.new method with_unnamed do case type when :single through do |key, value| new[key] = value if invert ^ (method.include? key or method.include? value) end when :list, :flat through do |key, values| new[key] = values if invert ^ (method.include? key or (method & values).length > 0) end else through do |key, values| new[key] = values if invert ^ (method.include? key or (method & values.flatten).length > 0) end end end when Regexp === method with_unnamed do through do |key, values| new[key] = values if invert ^ ([key,values].flatten.select{|v| v =~ method}.any?) end end when ((String === method) || (Symbol === method)) if block_given? case when block.arity == 1 with_unnamed do case when (method == key_field or method == :key) through do |key, values| new[key] = values if invert ^ (yield(key)) end when (type == :single or type == :flat) through do |key, value| new[key] = value if invert ^ (yield(value)) end else pos = identify_field method raise "Field #{ method } not identified. Available: #{ fields * ", " }" if pos.nil? through do |key, values| new[key] = values if invert ^ (yield(values[pos])) end end end when block.arity == 2 with_unnamed do case when (method == key_field or method == :key) through do |key, values| new[key] = values if invert ^ (yield(key, key)) end when (type == :single or type == :flat) through do |key, value| new[key] = value if invert ^ (yield(key, value)) end else pos = identify_field method through do |key, values| new[key] = values if invert ^ (yield(key, values[pos])) end end end end else with_unnamed do through do |key, values| new[key] = values if invert ^ ([key,values].flatten.select{|v| v == method}.any?) end end end when Hash === method key = method.keys.first method = method.values.first case when ((Array === method) and (key == :key or key_field == key)) with_unnamed do if invert keys.each do |key| new[key] = self[key] unless method.include?(key) end else method.each do |key| new[key] = self[key] if self.include?(key) end end end when Array === method with_unnamed do method = Set.new method unless Set === method case type when :single through :key, key do |key, value| new[key] = self[key] if invert ^ (method.include? value) end when :list through :key, key do |key, values| new[key] = self[key] if invert ^ (method.include? values.first) end when :flat #untested through :key, key do |key, values| new[key] = self[key] if invert ^ ((method & values.flatten).any?) end else through :key, key do |key, values| new[key] = self[key] if invert ^ ((method & values.flatten).any?) end end end when Regexp === method with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| v =~ method}.any?) end end when ((String === method) and (method =~ /name:(.*)/)) name = $1 old_unnamed = self.unnamed self.unnamed = false if name.strip =~ /^\/(.*)\/$/ regexp = Regexp.new $1 through :key, key do |key, values| case type when :single values = values.annotate([values]) when :double values = values[0] end new[key] = self[key] if invert ^ (values.select{|v| v.name =~ regexp}.any?) end else through :key, key do |key, values| case type when :single values = values.annotate([values]) when :double values = values[0] end new[key] = self[key] if invert ^ (values.select{|v| v.name == name}.any?) end end self.unnamed = old_unnamed when String === method if method =~ /^([<>]=?)(.*)/ with_unnamed do through :key, key do |key, values| value = Array === values ? values.flatten.first : values new[key] = self[key] if value.to_f.send($1, $2.to_f) end end else with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| v == method}.length > 0) end end end when Numeric === method with_unnamed do through :key, key do |key, values| new[key] = self[key] if invert ^ (values.flatten.length >= method) end end when Proc === method with_unnamed do through :key, key do |key, values| values = [values] if type == :single new[key] = self[key] if invert ^ (values.flatten.select{|v| method.call(v)}.length > 0) end end end end new end |
#slice(fields, **kwargs) ⇒ Object
43 44 45 |
# File 'lib/scout/tsv/util/reorder.rb', line 43 def slice(fields, **kwargs) reorder :key, fields, **kwargs end |
#sort(field = nil, just_keys = false, &block) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/scout/tsv/util/sort.rb', line 83 def sort(field = nil, just_keys = false, &block) field = :all if field.nil? if field == :all elems = collect else elems = [] case type when :single through :key, field do |key, value| elems << [key, value] end when :list, :flat through :key, field do |key, values| elems << [key, values[0]] end when :double through :key, field do |key, values| elems << [key, values[0]] end end end if not block_given? if fields == :all if just_keys keys = elems.sort_by{|key, value| key }.collect{|key, values| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) else elems.sort_by{|key, value| key } end else sorted = elems.sort do |a, b| a_value = a.last b_value = b.last a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?) b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?) case when (a_empty and b_empty) 0 when a_empty -1 when b_empty 1 when Array === a_value if a_value.length == 1 and b_value.length == 1 a_value.first <=> b_value.first else a_value.length <=> b_value.length end else a_value <=> b_value end end if just_keys keys = sorted.collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else sorted.collect{|key, value| [key, self[key]]} end end else if just_keys keys = elems.sort(&block).collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else elems.sort(&block).collect{|key, value| [key, self[key]]} end end end |
#sort_by(field = nil, just_keys = false, &block) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/scout/tsv/util/sort.rb', line 8 def sort_by(field = nil, just_keys = false, &block) field = :all if field.nil? if field == :all elems = collect else elems = [] case type when :single through :key, field do |key, value| elems << [key, value] end when :list, :flat through :key, field do |key, value| v = field == :key ? key : value[0] elems << [key, v] end when :double through :key, field do |key, value| v = field == :key ? key : value[0] elems << [key, v] end end end if not block_given? if fields == :all if just_keys keys = elems.sort_by{|key, value| key }.collect{|key, values| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed else elems.sort_by{|key, value| key } end else sorted = elems.sort do |a, b| a_value = a.last b_value = b.last a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?) b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?) case when (a_empty and b_empty) 0 when a_empty -1 when b_empty 1 when Array === a_value if a_value.length == 1 and b_value.length == 1 a_value.first <=> b_value.first else a_value.length <=> b_value.length end else a_value <=> b_value end end if just_keys keys = sorted.collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else sorted.collect{|key, value| [key, self[key]]} end end else if just_keys keys = elems.sort_by(&block).collect{|key, value| key} keys = prepare_entity(keys, key_field, ( || {}).merge(:dup_array => true)) unless @unnamed keys else elems.sort_by(&block).collect{|key, value| [key, self[key]]} end end end |
#subset(keys) ⇒ Object
262 263 264 265 266 267 268 269 270 |
# File 'lib/scout/tsv/util/select.rb', line 262 def subset(keys) new = self.annotate({}) self.with_unnamed do keys.each do |k| new[k] = self[k] if self.include?(k) end end new end |
#summary ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/scout/tsv/util.rb', line 121 def summary key = nil values = nil self.each do |k, v| key = k values = v break end filename = @filename filename = "No filename" if filename.nil? || String === filename && filename.empty? filename.find if Path === filename filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path with_unnamed do "Filename = \#{filename}\nKey field = \#{key_field || \"*No key field*\"}\nFields = \#{fields ? Log.fingerprint(fields) : \"*No field info*\"}\nType = \#{type}\nSize = \#{size}\nnamespace = \#{Log.fingerprint namespace}\nidentifiers = \#{Log.fingerprint identifiers}\nExample:\n- \#{key} -- \#{Log.fingerprint values }\n EOF\n end\nend\n" |
#to_double ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/scout/tsv/transformer.rb', line 136 def to_double return self if self.type == :double res = self.annotate({}) self.with_unnamed do transformer = Transformer.new self, res transformer.type = :double transformer.traverse do |k,v| case self.type when :single [k, [[v]]] when :list [k, v.collect{|v| [v] }] when :flat [k, [v]] end end end res end |
#to_flat ⇒ Object
169 170 171 172 173 174 175 176 177 178 |
# File 'lib/scout/tsv/transformer.rb', line 169 def to_flat res = self.annotate({}) transformer = Transformer.new self, res transformer.type = :flat transformer.traverse do |k,v| v = Array === v ? v.flatten : [v] [k, v] end res end |
#to_hash ⇒ Object
141 142 143 |
# File 'lib/scout/tsv.rb', line 141 def to_hash self.dup end |
#to_list ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/scout/tsv/transformer.rb', line 117 def to_list res = self.annotate({}) self.with_unnamed do transformer = Transformer.new self, res transformer.type = :list transformer.traverse do |k,v| case self.type when :single [k, [v]] when :double [k, v.collect{|v| v.first }] when :flat [k, v.slice(0,1)] end end end res end |
#to_s(options = {}) ⇒ Object
224 225 226 |
# File 'lib/scout/tsv/dumper.rb', line 224 def to_s( = {}) dumper_stream({stream: ''}.merge()) end |
#to_single ⇒ Object
157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/scout/tsv/transformer.rb', line 157 def to_single res = self.annotate({}) transformer = Transformer.new self, res transformer.type = :single transformer.unnamed = true transformer.traverse do |k,v| v = v.first while Array === v [k, v] end res end |
#translate(*args, **kwargs) ⇒ Object
152 153 154 |
# File 'lib/scout/tsv/change_id/translate.rb', line 152 def translate(*args, **kwargs) TSV.translate(self, *args, **kwargs) end |
#transpose(key_field = "Unkown ID") ⇒ Object
81 82 83 84 85 86 87 88 89 90 |
# File 'lib/scout/tsv/util/reorder.rb', line 81 def transpose(key_field = "Unkown ID") case type when :single, :flat self.to_list.transpose_list key_field when :list transpose_list key_field when :double transpose_double key_field end end |
#transpose_double(key_field = "Unkown ID") ⇒ Object
74 75 76 77 78 79 |
# File 'lib/scout/tsv/util/reorder.rb', line 74 def transpose_double(key_field = "Unkown ID") sep = "-!SEP--#{rand 10000}!-" tmp = self.to_list{|v| v * sep} new = tmp.transpose_list(key_field) new.to_double{|v| v.split(sep)} end |
#transpose_list(key_field = "Unkown ID") ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/scout/tsv/util/reorder.rb', line 59 def transpose_list(key_field="Unkown ID") new_fields = keys.dup new = self.annotate({}) TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers) m = Matrix.rows values new_rows = m.transpose.to_a fields.zip(new_rows) do |key,row| new[key] = row end new end |
#traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) ⇒ Object Also known as: through
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/scout/tsv/traverse.rb', line 3 def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block) key_field = key_field_pos if key_field.nil? fields = fields_pos.dup if fields.nil? type = @type if type.nil? key_pos = self.identify_field(key_field) fields = self.all_fields if fields == :all fields = [fields] unless fields.nil? || Array === fields positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields) positions = nil if fields == self.fields unnamed = @unnamed if unnamed.nil? unnamed = false if unnamed.nil? if key_pos == :key key_name = @key_field else key_name = @fields[key_pos] if positions.nil? positions = (0..@fields.length-1).to_a positions.delete_at key_pos positions.unshift :key end end fields = positions.collect{|p| p == :key ? self.key_field : self.fields[p] } if positions if positions.nil? && key_pos == :key field_names = @fields.dup elsif positions.nil? && key_pos != :key field_names = @fields.dup field_names.delete_at key_pos unless fields == :all elsif positions.include?(:key) field_names = positions.collect{|p| p == :key ? @key_field : @fields[p] } else field_names = @fields.values_at *positions end key_index = positions.index :key if positions positions.delete :key if positions = "Traverse #{Log.fingerprint self}" Log.debug = if TrueClass === invert = select.delete :invert if Hash === select type_swap_tag = [type.to_s, @type.to_s] * "_" Log::ProgressBar.(self, ) do || with_unnamed unnamed do each do |key,values| next unless TSV.select key, values, select, invert: invert if select .tick if values = [values] if @type == :single if positions.nil? if key_pos != :key values = values.dup if @type == :flat key = values else key = values.delete_at(key_pos) end end else orig_key = key key = @type == :flat ? values : values[key_pos] if key_pos != :key values = values.values_at(*positions) if key_index if @type == :double values.insert key_index, [orig_key] else values.insert key_index, orig_key end end end if ! unnamed && fields case @type when :flat, :single values = Entity.prepare_entity(values, fields.first) else values = NamedArray.setup(values, fields, ) end end values = TSV.cast_value(values, cast) if cast if Array === key key = key.uniq if uniq if @type == :double && one2one if one2one == :strict key.each_with_index do |key_i,i| if type == :double v_i = values.collect{|v| [v[i]] } else v_i = values.collect{|v| v[i] } end yield key_i, v_i end else key.each_with_index do |key_i,i| if type == :double v_i = values.collect{|v| [v[i] || v.first] } else v_i = values.collect{|v| v[i] || v.first } end yield key_i, v_i, @fields end end else key.each_with_index do |key_i, i| if type == :double yield key_i, values elsif type == :list yield key_i, values.collect{|v| v[i] } elsif type == :flat yield key_i, values.flatten elsif type == :single yield key_i, values.first end end end else if type == @type if type == :single yield key, values.first else yield key, values end else case type_swap_tag when "double_list" yield key, values.collect{|v| [v] } when "double_flat" yield key, [values] when "double_single" yield key, [values] when "list_double" yield key, values.collect{|v| v.first } when "list_flat" yield key, [values.first] when "list_single" yield key, values when "flat_double" yield key, values.flatten when "flat_list" yield key, values.flatten when "flat_single" yield key, values when "single_double" yield key, values.flatten.first when "single_list" yield key, values.first when "single_flat" yield key, values.first end end end end end end [key_name, field_names] end |
#unzip(*args, **kwargs) ⇒ Object
83 84 85 |
# File 'lib/scout/tsv/util/unzip.rb', line 83 def unzip(*args, **kwargs) TSV.unzip(self, *args, **kwargs) end |
#unzip_replicates ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/scout/tsv/util/unzip.rb', line 87 def unzip_replicates raise "Can only unzip replicates in :double TSVs" unless type == :double new = {} self.with_unnamed do through do |k,vs| NamedArray.zip_fields(vs).each_with_index do |v,i| new[k + "(#{i})"] = v end end end self.annotate(new) new.type = :list new end |
#with_filters(filters, &block) ⇒ Object
303 304 305 306 307 308 309 310 |
# File 'lib/scout/tsv/util/filter.rb', line 303 def with_filters(filters, &block) filter begin filters.each{|field,value| add_filter field, value } ensure reset_filters end end |
#with_unnamed(unnamed = nil) ⇒ Object
110 111 112 113 114 115 116 117 118 119 |
# File 'lib/scout/tsv/util.rb', line 110 def with_unnamed(unnamed = nil) unnamed = true if unnamed.nil? begin old_unnamed = @unnamed @unnamed = unnamed yield ensure @unnamed = old_unnamed end end |
#write_file(file) ⇒ Object
230 231 232 233 234 |
# File 'lib/scout/tsv/dumper.rb', line 230 def write_file(file) Open.open(file, mode: 'w') do |f| dumper_stream(stream: f) end end |
#zip(merge = false, field = "New Field", sep = ":") ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/scout/tsv/util/unzip.rb', line 105 def zip(merge = false, field = "New Field", sep = ":") new = {} self.annotate new new.type = :double if merge new.with_unnamed do if merge self.through do |key,values| new_key, new_value = key.split(sep) new_values = values + [[new_value] * values.first.length] if new.include? new_key current = new[new_key] current.each_with_index do |v,i| v.concat(new_values[i]) end else new[new_key] = new_values end end else self.through do |key,values| new_key, new_value = key.split(sep) new_values = values + [new_value] new[new_key] = new_values end end end if self.key_field and self.fields new.key_field = self.key_field.partition(sep).first new.fields = new.fields + [field] end new end |
#zip_new(key, values, insitu: :lax) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/scout/tsv/util.rb', line 67 def zip_new(key, values, insitu: :lax) values = values.collect{|v| Array === v ? v : [v] } unless Array === values.first if current_values = self[key] if insitu == :lax self[key] = NamedArray.add_zipped(current_values, values) elsif insitu NamedArray.add_zipped(current_values, values) else self[key] = NamedArray.add_zipped(current_values.dup, values) end else if insitu && insitu != :lax self[key] = values.dup else self[key] = values end end end |