Class: GoodData::Datawarehouse
- Inherits:
-
Object
- Object
- GoodData::Datawarehouse
- Defined in:
- lib/gooddata_datawarehouse/version.rb,
lib/gooddata_datawarehouse/datawarehouse.rb
Constant Summary collapse
- VERSION =
"0.0.7"- PARALEL_COPY_THREAD_COUNT =
10
Instance Method Summary collapse
- #connect ⇒ Object
- #create_table(name, columns, opts = {}) ⇒ Object
-
#create_table_from_csv_header(table_name, csv_path, opts = {}) ⇒ Object
returns a list of columns created does nothing if file empty, returns [].
- #csv_to_new_table(table_name, csvs, opts = {}) ⇒ Object
- #drop_table(table_name, opts = {}) ⇒ Object
-
#execute(sql_strings) ⇒ Object
execute sql, return nothing.
-
#execute_select(sql, opts = {}) ⇒ Object
executes sql (select), for each row, passes execution to block.
- #export_table(table_name, csv_path) ⇒ Object
- #get_columns(table_name) ⇒ Object
- #init_file(given_filename, key, csv_path, single_file) ⇒ Object
-
#initialize(username, password, instance_id, opts = {}) ⇒ Datawarehouse
constructor
A new instance of Datawarehouse.
- #load_data_from_csv(table_name, csvs, opts = {}) ⇒ Object
- #rename_table(old_name, new_name) ⇒ Object
- #table_exists?(name) ⇒ Boolean
- #table_row_count(table_name) ⇒ Object
- #truncate_table(table_name) ⇒ Object
Constructor Details
#initialize(username, password, instance_id, opts = {}) ⇒ Datawarehouse
Returns a new instance of Datawarehouse.
13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 13 def initialize(username, password, instance_id, opts={}) @logger = Logger.new(STDOUT) @username = username @password = password @jdbc_url = "jdbc:dss://secure.gooddata.com/gdc/dss/instances/#{instance_id}" if @username.nil? || @password.nil? || instance_id.nil? fail ArgumentError, "username, password and/or instance_id are nil. All of them are mandatory." end Jdbc::DSS.load_driver Java.com.gooddata.dss.jdbc.driver.DssDriver end |
Instance Method Details
#connect ⇒ Object
188 189 190 191 192 193 194 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 188 def connect Sequel.connect @jdbc_url, :username => @username, :password => @password do |connection| yield(connection) end end |
#create_table(name, columns, opts = {}) ⇒ Object
126 127 128 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 126 def create_table(name, columns, opts={}) execute(GoodData::SQLGenerator.create_table(name, columns, opts)) end |
#create_table_from_csv_header(table_name, csv_path, opts = {}) ⇒ Object
returns a list of columns created does nothing if file empty, returns []
119 120 121 122 123 124 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 119 def create_table_from_csv_header(table_name, csv_path, opts={}) # take the header as a list of columns columns = get_csv_headers(csv_path) create_table(table_name, columns, opts) unless columns.empty? columns end |
#csv_to_new_table(table_name, csvs, opts = {}) ⇒ Object
57 58 59 60 61 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 57 def csv_to_new_table(table_name, csvs, opts={}) csv_list = list_files(csvs) cols = create_table_from_csv_header(table_name, csv_list[0], opts) load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true)) end |
#drop_table(table_name, opts = {}) ⇒ Object
53 54 55 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 53 def drop_table(table_name, opts={}) execute(GoodData::SQLGenerator.drop_table(table_name,opts)) end |
#execute(sql_strings) ⇒ Object
execute sql, return nothing
144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 144 def execute(sql_strings) if ! sql_strings.kind_of?(Array) sql_strings = [sql_strings] end connect do |connection| sql_strings.each do |sql| @logger.info("Executing sql: #{sql}") if @logger connection.run(sql) end end end |
#execute_select(sql, opts = {}) ⇒ Object
executes sql (select), for each row, passes execution to block
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 157 def execute_select(sql, opts={}) fetch_handler = opts[:fetch_handler] count = opts[:count] connect do |connection| # do the query f = connection.fetch(sql) @logger.info("Executing sql: #{sql}") if @logger # if handler was passed call it if fetch_handler fetch_handler.call(f) end if count return f.first[:count] end # if block given yield to process line by line if block_given? # go through the rows returned and call the block return f.each do |row| yield(row) end end # return it all at once f.map{|h| h} end end |
#export_table(table_name, csv_path) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 26 def export_table(table_name, csv_path) CSV.open(csv_path, 'wb', :force_quotes => true) do |csv| # get the names of cols cols = get_columns(table_name).map {|c| c[:column_name]} # write header csv << cols # get the keys for columns, stupid sequel col_keys = nil rows = execute_select(GoodData::SQLGenerator.select_all(table_name, limit: 1)) col_keys = rows[0].keys execute_select(GoodData::SQLGenerator.select_all(table_name)) do |row| # go through the table write to csv csv << row.values_at(*col_keys) end end @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}" csv_path end |
#get_columns(table_name) ⇒ Object
139 140 141 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 139 def get_columns(table_name) res = execute_select(GoodData::SQLGenerator.get_columns(table_name)) end |
#init_file(given_filename, key, csv_path, single_file) ⇒ Object
106 107 108 109 110 111 112 113 114 115 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 106 def init_file(given_filename, key, csv_path, single_file) # only use file postfix if there are multiple files postfix = single_file ? '' : "-#{File.basename(csv_path)}" # take what we have and put the source csv name at the end given_filename = given_filename.path if given_filename.is_a?(File) f = "#{given_filename || Tempfile.new(key).path}#{postfix}" f = File.new(f, 'w') unless f.is_a?(File) f end |
#load_data_from_csv(table_name, csvs, opts = {}) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 67 def load_data_from_csv(table_name, csvs, opts={}) thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT # get the list of files to load and columns in the csv csv_list = list_files(csvs) columns = opts[:columns] || get_csv_headers(csv_list[0]) # truncate_table unless data should be appended unless opts[:append] truncate_table(table_name) end # load each csv from the list single_file = (csv_list.size == 1) csv_list.peach(thread_count) do |csv_path| if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil? exc = nil rej = nil opts_file = opts else opts_file = opts.clone # priradit do opts i do exc - # temporary files to get the excepted records (if not given) exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file) rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file) end # execute the load execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file)) exc.close if exc rej.close if rej # if there was something rejected and it shouldn't be ignored, raise an error if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors]) fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}" end end end |
#rename_table(old_name, new_name) ⇒ Object
49 50 51 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 49 def rename_table(old_name, new_name) execute(GoodData::SQLGenerator.rename_table(old_name, new_name)) end |
#table_exists?(name) ⇒ Boolean
130 131 132 133 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 130 def table_exists?(name) count = execute_select(GoodData::SQLGenerator.get_table_count(name), :count => true) count > 0 end |
#table_row_count(table_name) ⇒ Object
135 136 137 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 135 def table_row_count(table_name) execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true) end |
#truncate_table(table_name) ⇒ Object
63 64 65 |
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 63 def truncate_table(table_name) execute(GoodData::SQLGenerator.truncate_table(table_name)) end |