Class: GoodData::Datawarehouse

Inherits:
Object
  • Object
show all
Defined in:
lib/gooddata_datawarehouse/version.rb,
lib/gooddata_datawarehouse/datawarehouse.rb

Constant Summary collapse

VERSION =
"0.0.9"
PARALEL_COPY_THREAD_COUNT =
10

Instance Method Summary collapse

Constructor Details

#initialize(username, password, instance_id, opts = {}) ⇒ Datawarehouse

Returns a new instance of Datawarehouse.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 13

def initialize(username, password, instance_id, opts={})
  @logger = Logger.new(STDOUT)
  @username = username
  @password = password
  @jdbc_url = opts[:jdbc_url] || "jdbc:gdc:datawarehouse://secure.gooddata.com/gdc/datawarehouse/instances/#{instance_id}"
  if @username.nil? || @password.nil?
    fail ArgumentError, "username and/or password are nil. All of them are mandatory."
  end

  if instance_id.nil? && opts[:jdbc_url].nil?
    fail ArgumentError, "you must either provide instance_id or jdbc_url option."
  end

  Jdbc::DSS.load_driver
  Java.com.gooddata.dss.jdbc.driver.DssDriver
end

Instance Method Details

#connectObject



194
195
196
197
198
199
200
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 194

def connect
  Sequel.connect @jdbc_url,
    :username => @username,
    :password => @password do |connection|
      yield(connection)
  end
end

#create_table(name, columns, opts = {}) ⇒ Object



132
133
134
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 132

def create_table(name, columns, opts={})
  execute(GoodData::SQLGenerator.create_table(name, columns, opts))
end

#create_table_from_csv_header(table_name, csv_path, opts = {}) ⇒ Object

returns a list of columns created does nothing if file empty, returns []



125
126
127
128
129
130
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 125

def create_table_from_csv_header(table_name, csv_path, opts={})
  # take the header as a list of columns
  columns = get_csv_headers(csv_path)
  create_table(table_name, columns, opts) unless columns.empty?
  columns
end

#csv_to_new_table(table_name, csvs, opts = {}) ⇒ Object



61
62
63
64
65
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 61

def csv_to_new_table(table_name, csvs, opts={})
  csv_list = list_files(csvs)
  cols = create_table_from_csv_header(table_name, csv_list[0], opts)
  load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
end

#drop_table(table_name, opts = {}) ⇒ Object



57
58
59
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 57

def drop_table(table_name, opts={})
  execute(GoodData::SQLGenerator.drop_table(table_name,opts))
end

#execute(sql_strings) ⇒ Object

execute sql, return nothing



150
151
152
153
154
155
156
157
158
159
160
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 150

def execute(sql_strings)
  if ! sql_strings.kind_of?(Array)
    sql_strings = [sql_strings]
  end
  connect do |connection|
    sql_strings.each do |sql|
      @logger.info("Executing sql: #{sql}") if @logger
      connection.run(sql)
    end
  end
end

#execute_select(sql, opts = {}) ⇒ Object

executes sql (select), for each row, passes execution to block



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 163

def execute_select(sql, opts={})
  fetch_handler = opts[:fetch_handler]
  count = opts[:count]

  connect do |connection|
    # do the query
    f = connection.fetch(sql)

    @logger.info("Executing sql: #{sql}") if @logger
    # if handler was passed call it
    if fetch_handler
      fetch_handler.call(f)
    end

    if count
      return f.first[:count]
    end

    # if block given yield to process line by line
    if block_given?
      # go through the rows returned and call the block
      return f.each do |row|
        yield(row)
      end
    end

    # return it all at once
    f.map{|h| h}
  end
end

#export_table(table_name, csv_path) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 30

def export_table(table_name, csv_path)
  CSV.open(csv_path, 'wb', :force_quotes => true) do |csv|
    # get the names of cols
    cols = get_columns(table_name).map {|c| c[:column_name]}

    # write header
    csv << cols

    # get the keys for columns, stupid sequel
    col_keys = nil
    rows = execute_select(GoodData::SQLGenerator.select_all(table_name, limit: 1))

    col_keys = rows[0].keys

    execute_select(GoodData::SQLGenerator.select_all(table_name)) do |row|
      # go through the table write to csv
      csv << row.values_at(*col_keys)
    end
  end
  @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
  csv_path
end

#get_columns(table_name) ⇒ Object



145
146
147
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 145

def get_columns(table_name)
  res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
end

#init_file(given_filename, key, csv_path, single_file) ⇒ Object



112
113
114
115
116
117
118
119
120
121
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 112

def init_file(given_filename, key, csv_path, single_file)
  # only use file postfix if there are multiple files
  postfix = single_file ? '' : "-#{File.basename(csv_path)}"

  # take what we have and put the source csv name at the end
  given_filename = given_filename.path if given_filename.is_a?(File)
  f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
  f = File.new(f, 'w') unless f.is_a?(File)
  f
end

#load_data_from_csv(table_name, csvs, opts = {}) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 71

def load_data_from_csv(table_name, csvs, opts={})
  thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
  # get the list of files to load and columns in the csv
  csv_list = list_files(csvs)
  columns = opts[:columns] || get_csv_headers(csv_list[0])

  # truncate_table unless data should be appended
  unless opts[:append]
    truncate_table(table_name)
  end

  # load each csv from the list
  single_file = (csv_list.size == 1)
  csv_list.each do |csv_path|
    begin
      if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
        exc = nil
        rej = nil
        opts_file = opts
      else
        opts_file = opts.clone
        # priradit do opts i do exc -
        # temporary files to get the excepted records (if not given)
        exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
        rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
      end

      # execute the load
      execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))

      # if there was something rejected and it shouldn't be ignored, raise an error
      if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
        fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
      end
    ensure
      exc.close if exc
      rej.close if rej
    end
  end
end

#rename_table(old_name, new_name) ⇒ Object



53
54
55
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 53

def rename_table(old_name, new_name)
  execute(GoodData::SQLGenerator.rename_table(old_name, new_name))
end

#table_exists?(name) ⇒ Boolean

Returns:

  • (Boolean)


136
137
138
139
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 136

def table_exists?(name)
  count = execute_select(GoodData::SQLGenerator.get_table_count(name), :count => true)
  count > 0
end

#table_row_count(table_name) ⇒ Object



141
142
143
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 141

def table_row_count(table_name)
  execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
end

#truncate_table(table_name) ⇒ Object



67
68
69
# File 'lib/gooddata_datawarehouse/datawarehouse.rb', line 67

def truncate_table(table_name)
  execute(GoodData::SQLGenerator.truncate_table(table_name))
end