Class: Naginegi::BigQuery

Inherits:
Object
  • Object
show all
Defined in:
lib/naginegi/bigquery.rb

Constant Summary collapse

CONTENTS =
<<-EOS.unindent
in:
  type: <%= db_type %>
  host: <%= host %>
  user: <%= user %>
  password: <%= password %>
  database: <%= database %>
  ssl: <%= ssl %>
  query: |
    <%= query %>
  <%= options %>
out:
  type: bigquery
  mode: replace
  auth_method: <%= auth_method %>
  json_keyfile: <%= json_keyfile %>
    <%= json_key_content %>
  project: <%= project %>
  service_account_email: <%= service_account_email %>
  dataset: <%= dataset %>
  table: <%= table_name %>
  schema_file: <%= schema_file %>
  auto_create_table: true
  path_prefix: <%= path_prefix %>
  source_format: NEWLINE_DELIMITED_JSON
  file_ext: .json.gz
  delete_from_local_when_job_end: 1
  formatter:
    type: jsonl
  encoders:
  - {type: gzip}
EOS

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ BigQuery

Returns a new instance of BigQuery.



42
43
44
45
# File 'lib/naginegi/bigquery.rb', line 42

def initialize(config)
  @config = config.dup
  @current_date = Date.today
end

Class Method Details

.generate_schema(columns) ⇒ Object



47
48
49
50
# File 'lib/naginegi/bigquery.rb', line 47

def self.generate_schema(columns)
  json_body = columns.map(&:to_json).join(",\n")
  "[\n" + json_body + "\n]\n"
end

.generate_sql(table_config, columns) ⇒ Object



52
53
54
55
56
57
58
59
# File 'lib/naginegi/bigquery.rb', line 52

def self.generate_sql(table_config, columns)
  columns = columns.map(&:converted_value)
  sql = "SELECT #{columns.join(',')}"
  sql << " FROM #{table_config.name}"
  sql << " WHERE #{table_config.condition}" if table_config.condition
  sql << "\n"
  sql
end

Instance Method Details

#actual_table_name(table_name, daily_snapshot) ⇒ Object



111
112
113
114
# File 'lib/naginegi/bigquery.rb', line 111

def actual_table_name(table_name, daily_snapshot)
  return table_name unless daily_snapshot
  table_name + @current_date.strftime('%Y%m%d')
end

#delete_table(dataset, table_name) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/naginegi/bigquery.rb', line 95

def delete_table(dataset, table_name)
  keyfile = if @config['json_key']
              value = @config['json_key'].dup
              value['private_key'] = value['private_key'].gsub('\\n', "\n")
              value
            else
              @config['json_keyfile']
            end

  bq = Google::Cloud::Bigquery.new(
    project: @config['project_id'],
    keyfile: keyfile
  )
  bq.service.delete_table(dataset, table_name)
end

#generate_embulk_config(db_name, db_config, table_config, columns) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/naginegi/bigquery.rb', line 61

def generate_embulk_config(db_name, db_config, table_config, columns)
  db_type = db_config['db_type']
  host = db_config['host']
  user = db_config['username']
  password = db_config['password']
  database = db_config['database']
  ssl = db_config['embulk_ssl_enable'] || false
  options = if db_type == 'mysql'
              "options: {useLegacyDatetimeCode: false, serverTimezone: #{db_config['timezone']}}"
            else
              ''
            end
  query = Naginegi::BigQuery.generate_sql(table_config, columns)

  auth_method = @config['auth_method']
  if @config['json_key']
    values = @config['json_key'].map do |k, v|
      value = v.gsub("\n", '\\n')
      "\"#{k}\": \"#{value}\""
    end
    json_key_content = "content: |\n      {#{values.join(',')}}"
  else
    json_keyfile = @config['json_keyfile']
  end
  project = @config['project_id']
   = @config['service_email']
  dataset = db_config['bq_dataset']
  table_name = actual_table_name(table_config.name, db_config['daily_snapshot'] || table_config.daily_snapshot)
  schema_file = "#{@config['schema_dir']}/#{db_name}/#{table_config.name}.json"
  path_prefix = "/var/tmp/embulk_#{db_name}_#{table_config.name}"

  ERB.new(CONTENTS).result(binding)
end