313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
|
# File 'lib/rust/core/types/dataframe.rb', line 313
def merge(other, by, first_alias = "x", second_alias = "y")
raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
if first_alias == second_alias
if first_alias == ""
my_columns = self.column_names - by
other_columns = other.column_names - by
intersection = my_columns & other_columns
raise "Cannot merge because the following columns would overlap: #{intersection}" if intersection.size > 0
else
raise "The aliases can not have the same value"
end
end
my_keys = {}
self.each_with_index do |row, i|
key = []
by.each do |colname|
key << row[colname]
end
my_keys[key] = i
end
merged_column_self = (self.column_names - by)
merged_column_other = (other.column_names - by)
first_alias = first_alias + "." if first_alias.length > 0
second_alias = second_alias + "." if second_alias.length > 0
merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
columns = by + merged_columns
result = DataFrame.new(columns)
other.each do |other_row|
key = []
by.each do |colname|
key << other_row[colname]
end
my_row_index = my_keys[key]
if my_row_index
my_row = self.row(my_row_index)
to_add = {}
by.each do |colname|
to_add[colname] = my_row[colname]
end
merged_column_self.each do |colname|
to_add["#{first_alias}#{colname}"] = my_row[colname]
end
merged_column_other.each do |colname|
to_add["#{second_alias}#{colname}"] = other_row[colname]
end
result << to_add
end
end
return result
end
|