Class: GoodData::Datawarehouse

Inherits:
Object
  • Object
show all
Defined in:
lib/gooddata_datawarehouse/version.rb,
lib/gooddata_datawarehouse/datawarehouse.rb

Constant Summary collapse

VERSION =
"0.0.8"
PARALEL_COPY_THREAD_COUNT =
10

Instance Method Summary collapse

Constructor Details

#initialize(username, password, instance_id, opts = {}) ⇒ Datawarehouse

Returns a new instance of Datawarehouse.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 13

def initialize(username, password, instance_id, opts={})
  @logger = Logger.new(STDOUT)
  @username = username
  @password = password
  @jdbc_url = opts[:jdbc_url] || "jdbc:dss://secure.gooddata.com/gdc/dss/instances/#{instance_id}"
  if @username.nil? || @password.nil?
    fail ArgumentError, "username and/or password are nil. All of them are mandatory."
  end

  if instance_id.nil? && opts[:jdbc_url].nil?
    fail ArgumentError, "you must either provide instance_id or jdbc_url option."
  end

  Jdbc::DSS.load_driver
  Java.com.gooddata.dss.jdbc.driver.DssDriver
end

Instance Method Details

#connectObject



192
193
194
195
196
197
198
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 192

def connect
  Sequel.connect @jdbc_url,
    :username => @username,
    :password => @password do |connection|
      yield(connection)
  end
end

#create_table(name, columns, opts = {}) ⇒ Object



130
131
132
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 130

def create_table(name, columns, opts={})
  execute(GoodData::SQLGenerator.create_table(name, columns, opts))
end

#create_table_from_csv_header(table_name, csv_path, opts = {}) ⇒ Object

returns a list of columns created does nothing if file empty, returns []



123
124
125
126
127
128
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 123

def create_table_from_csv_header(table_name, csv_path, opts={})
  # take the header as a list of columns
  columns = get_csv_headers(csv_path)
  create_table(table_name, columns, opts) unless columns.empty?
  columns
end

#csv_to_new_table(table_name, csvs, opts = {}) ⇒ Object



61
62
63
64
65
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 61

def csv_to_new_table(table_name, csvs, opts={})
  csv_list = list_files(csvs)
  cols = create_table_from_csv_header(table_name, csv_list[0], opts)
  load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
end

#drop_table(table_name, opts = {}) ⇒ Object



57
58
59
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 57

def drop_table(table_name, opts={})
  execute(GoodData::SQLGenerator.drop_table(table_name,opts))
end

#execute(sql_strings) ⇒ Object

execute sql, return nothing



148
149
150
151
152
153
154
155
156
157
158
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 148

def execute(sql_strings)
  if ! sql_strings.kind_of?(Array)
    sql_strings = [sql_strings]
  end
  connect do |connection|
    sql_strings.each do |sql|
      @logger.info("Executing sql: #{sql}") if @logger
      connection.run(sql)
    end
  end
end

#execute_select(sql, opts = {}) ⇒ Object

executes sql (select), for each row, passes execution to block



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 161

def execute_select(sql, opts={})
  fetch_handler = opts[:fetch_handler]
  count = opts[:count]

  connect do |connection|
    # do the query
    f = connection.fetch(sql)

    @logger.info("Executing sql: #{sql}") if @logger
    # if handler was passed call it
    if fetch_handler
      fetch_handler.call(f)
    end

    if count
      return f.first[:count]
    end

    # if block given yield to process line by line
    if block_given?
      # go through the rows returned and call the block
      return f.each do |row|
        yield(row)
      end
    end

    # return it all at once
    f.map{|h| h}
  end
end

#export_table(table_name, csv_path) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 30

def export_table(table_name, csv_path)
  CSV.open(csv_path, 'wb', :force_quotes => true) do |csv|
    # get the names of cols
    cols = get_columns(table_name).map {|c| c[:column_name]}

    # write header
    csv << cols

    # get the keys for columns, stupid sequel
    col_keys = nil
    rows = execute_select(GoodData::SQLGenerator.select_all(table_name, limit: 1))

    col_keys = rows[0].keys

    execute_select(GoodData::SQLGenerator.select_all(table_name)) do |row|
      # go through the table write to csv
      csv << row.values_at(*col_keys)
    end
  end
  @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
  csv_path
end

#get_columns(table_name) ⇒ Object



143
144
145
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 143

def get_columns(table_name)
  res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
end

#init_file(given_filename, key, csv_path, single_file) ⇒ Object



110
111
112
113
114
115
116
117
118
119
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 110

def init_file(given_filename, key, csv_path, single_file)
  # only use file postfix if there are multiple files
  postfix = single_file ? '' : "-#{File.basename(csv_path)}"

  # take what we have and put the source csv name at the end
  given_filename = given_filename.path if given_filename.is_a?(File)
  f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
  f = File.new(f, 'w') unless f.is_a?(File)
  f
end

#load_data_from_csv(table_name, csvs, opts = {}) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 71

def load_data_from_csv(table_name, csvs, opts={})
  thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
  # get the list of files to load and columns in the csv
  csv_list = list_files(csvs)
  columns = opts[:columns] || get_csv_headers(csv_list[0])

  # truncate_table unless data should be appended
  unless opts[:append]
    truncate_table(table_name)
  end

  # load each csv from the list
  single_file = (csv_list.size == 1)
  csv_list.peach(thread_count) do |csv_path|
    if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
      exc = nil
      rej = nil
      opts_file = opts
    else
      opts_file = opts.clone
      # priradit do opts i do exc -
      # temporary files to get the excepted records (if not given)
      exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
      rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
    end

    # execute the load
    execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))

    exc.close if exc
    rej.close if rej

    # if there was something rejected and it shouldn't be ignored, raise an error
    if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
      fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
    end
  end
end

#rename_table(old_name, new_name) ⇒ Object



53
54
55
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 53

def rename_table(old_name, new_name)
  execute(GoodData::SQLGenerator.rename_table(old_name, new_name))
end

#table_exists?(name) ⇒ Boolean

Returns:

  • (Boolean)


134
135
136
137
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 134

def table_exists?(name)
  count = execute_select(GoodData::SQLGenerator.get_table_count(name), :count => true)
  count > 0
end

#table_row_count(table_name) ⇒ Object



139
140
141
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 139

def table_row_count(table_name)
  execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
end

#truncate_table(table_name) ⇒ Object



67
68
69
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 67

def truncate_table(table_name)
  execute(GoodData::SQLGenerator.truncate_table(table_name))
end