Class: Daru::Core::MergeFrame
- Inherits:
-
Object
- Object
- Daru::Core::MergeFrame
- Defined in:
- lib/daru/core/merge.rb
Instance Method Summary collapse
-
#initialize(df1, df2, on: nil) ⇒ MergeFrame
constructor
A new instance of MergeFrame.
- #inner(_opts) ⇒ Object
- #left(_opts) ⇒ Object
- #merge_join(left: true, right: true) ⇒ Object
- #outer(_opts) ⇒ Object
- #right(_opts) ⇒ Object
Constructor Details
#initialize(df1, df2, on: nil) ⇒ MergeFrame
Returns a new instance of MergeFrame.
78 79 80 81 82 |
# File 'lib/daru/core/merge.rb', line 78 def initialize(df1, df2, on: nil) @df1 = df1 @df2 = df2 @on = on end |
Instance Method Details
#inner(_opts) ⇒ Object
84 85 86 |
# File 'lib/daru/core/merge.rb', line 84 def inner _opts merge_join(left: false, right: false) end |
#left(_opts) ⇒ Object
88 89 90 |
# File 'lib/daru/core/merge.rb', line 88 def left _opts merge_join(left: true, right: false) end |
#merge_join(left: true, right: true) ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/daru/core/merge.rb', line 100 def merge_join(left: true, right: true) MergeHelper.verify_dataframes df1_hash, df2_hash, @on MergeHelper.resolve_duplicates df1_hash, df2_hash, @on # TODO: Use native dataframe sorting. # It would be ideal to reuse sorting functionality that is native # to dataframes. Unfortunately, native dataframe sort introduces # an overhead that reduces join performance by a factor of 4! Until # that aspect is improved, we resort to a simpler array sort. df1_array.sort_by! { |row| [row[0].nil? ? 0 : 1, row[0]] } df2_array.sort_by! { |row| [row[0].nil? ? 0 : 1, row[0]] } idx1 = 0 idx2 = 0 while idx1 < @df1.size || idx2 < @df2.size key1 = df1_array[idx1][0] if idx1 < @df1.size key2 = df2_array[idx2][0] if idx2 < @df2.size if key1 == key2 && idx1 < @df1.size && idx2 < @df2.size idx2_start = idx2 while (idx2 < @df2.size) && (df1_array[idx1][0] == df2_array[idx2][0]) add_merge_row_to_hash([df1_array[idx1], df2_array[idx2]], joined_hash) idx2 += 1 end idx2 = idx2_start if idx1+1 < @df1.size && df1_array[idx1][0] == df1_array[idx1+1][0] idx1 += 1 elsif ((key2.nil? || [key1,key2].sort == [key1,key2]) && idx1 < @df1.size) || idx2 == @df2.size add_merge_row_to_hash([df1_array[idx1], nil], joined_hash) if left idx1 += 1 elsif idx2 < @df2.size || idx1 == @df1.size add_merge_row_to_hash([nil, df2_array[idx2]], joined_hash) if right idx2 += 1 else raise 'Unexpected condition met during merge' end end Daru::DataFrame.new(joined_hash, order: joined_hash.keys) end |
#outer(_opts) ⇒ Object
96 97 98 |
# File 'lib/daru/core/merge.rb', line 96 def outer _opts merge_join(left: true, right: true) end |
#right(_opts) ⇒ Object
92 93 94 |
# File 'lib/daru/core/merge.rb', line 92 def right _opts merge_join(left: false, right: true) end |