Class: Zillabyte::Command::Data

Inherits:
Base
  • Object
show all
Defined in:
lib/zillabyte/cli/data.rb

Overview

manage custom datasets

Constant Summary collapse

MAX_POLL_SECONDS =
60 * 5
POLL_SLEEP =
1
APPENDS_ROWS_SLICE =
5_000
ONE_HUNDRED_MB =
1e8

Constants inherited from Base

Base::META_COLUMNS

Instance Attribute Summary

Attributes inherited from Base

#args, #options

Instance Method Summary collapse

Methods inherited from Base

#api, #initialize, namespace

Methods included from Helpers

#app, #ask, #command, #create_git_remote, #display, #error, #extract_app_from_git_config, #extract_app_in_dir, #format_with_bang, #friendly_dir, #get_flow_ui_link, #get_info, #get_rich_info, #git, #handle_downloading_manifest, #has_git?, #longest, #read_multiline, #truncate_message, #version_okay?, #with_tty

Constructor Details

This class inherits a constructor from Zillabyte::Command::Base

Instance Method Details

#appendObject

data:append ID FILE

Adds data to an existing dataset.

–filetype FILETYPE # Input File format type, defaults to csv –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/zillabyte/cli/data.rb', line 242

def append

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  type = options[:output_type]

  filetype = options[:filetype]
  filetype ||= File.extname(file || "").gsub(".", "")
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil?
  
  dataset = self.api.data.get(id, options)
  schema = dataset["schema"]
  n_shards = calculate_number_of_shards(file)
 
  display("uploading content.", false)
  res = self.api.data.append(id, {:shards => n_shards})
  
  if res['error']
    error("#{res['error_message']}", type)
  else
    row_size = save_rows(schema, file, filetype, res['uris'], type)
    if type == "json"
      display({:rows => row_size}.to_json)
    else
      display "dataset ##{id} appended #{row_size} rows"
    end
  end

end

#authorizeObject

data:authorize [ID] [SCOPE]

changes permission on the dataset

–id ID # The dataset id –public # Makes the dataset public (default) –private # Makes the dataset private



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/zillabyte/cli/data.rb', line 179

def authorize

  id = options[:id] || shift_argument
  scope = options[:scope] || shift_argument || "public"
  make_public = options[:public]
  make_private = options[:private]
  
  error("no id given", type) if id.nil?
  error("both --public and --private cannot be given", type) if make_public && make_private
  if make_private
    scope = "private"
  end

  res = self.api.request(
    :expects  => 200,
    :method   => :post,
    :path     => "/relations/#{CGI.escape(id)}/authorizations",
    :body     => {:scope => scope}.to_json
  ).body

  display "Authorization updated"
end

#createObject

data:create NAME

Creates a new dataset.

–schema SCHEMA # Column names and types in the format “field_1:output_type_1,field_2:output_type_2,…” –public SCOPE # Make the dataset public –file FILE # A data file –filetype FILETYPE # File format type, defaults to csv –description DESCRIPTION # Description of dataset contents –aliases ALIASES # Dataset name aliases in the format “alias_1,alias_2,…” –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/zillabyte/cli/data.rb', line 128

def create

  name = options[:name] || shift_argument
  file = options[:file] || nil
  filetype = options[:filetype] || nil
  type = options[:output_type]

  error("no name given", type) if name.nil?

  schema = options[:schema] if options[:schema]
  is_public = options[:public] || false
  description = options[:description] || nil
  aliases = options[:aliases] || nil

  hash = get_dataset_properties(schema,is_public,description,aliases)

  if file
    filetype ||= File.extname(file).gsub(".", "")
    n_shards = calculate_number_of_shards(file)
    hash[:shards] = n_shards
    display "Sharding into ~#{n_shards} shard(s) for upload. (Actual number may be smaller.)" if type.nil?
  end

  res = api.data.create name, hash

  if res['error']
    error("#{res['error_message']}", type)
  else
    if file
      row_size = save_rows(hash[:schema], file, filetype, res['uris'], type)
    end
    if type == "json"
      display "{}"
    else
      display "dataset ##{res['id']} #{res['action']}. size: #{row_size || 0} rows."
    end
  end
end

#deleteObject

data:delete ID

Deletes a dataset.

-f, –force # Delete without asking for confirmation –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/zillabyte/cli/data.rb', line 76

def delete 
  id = options[:id] || shift_argument
  forced = options[:force]
  type = options[:output_type] || nil

  if !forced

    if !type.nil?
      error("specify -f, --force to confirm deletion", type)
    end

    while true
      display "This operation cannot be undone. Are you sure you want to delete this dataset? (yes/no):", false
      confirm = ask
      break if confirm == "yes" || confirm == "no"
      display "Please enter 'yes' to delete the dataset or 'no' to exit"
    end
  end

  confirmed = forced || confirm == "yes"
  if confirmed
    res = api.data.delete(id, options)

    if res['error']
      error(res['error'], type)
    else
      if type == "json"
        display "{}"
      else
        display res["body"]
      end
    end
  end
end

#indexObject

data

Lists your custom datasets.

–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



18
19
20
# File 'lib/zillabyte/cli/data.rb', line 18

def index
  self.list
end

#listObject

data

Lists your custom datasets.

–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/zillabyte/cli/data.rb', line 30

def list
  type = options[:output_type] || nil

  response = api.request(
    :expects  => 200,
    :method   => :get,
    :path     => "/relations"
  )

  headings = ["id", "name","rows"]
  rows = response.body.map do |row|
    headings = row.keys if headings.size == 0
    row["columns"] = row["columns"].map{|c|c['type']}.join(',')
    row["aliases"] = row["aliases"].map{|a| a['name']}.join(',')

    row = row.keep_if {|col,val| headings.include? col}
    vals = row.map do |col,val| 
      if col == "rows"
        require("zillabyte/cli/helpers/table_output_builder")
        val = TableOutputBuilder.format_row_count(val)
      end
      val
    end
    vals 
  end

  display "datasets\n" if type.nil? && rows.size > 0
  require("zillabyte/cli/helpers/table_output_builder")
  display TableOutputBuilder.build_table(headings, rows, type)
  display "Total number of datasets: "+rows.length.to_s if type.nil?

end

#pullObject

data:pull ID OUTPUT

Pulls dataset into OUTPUT.gz.

–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/zillabyte/cli/data.rb', line 286

def pull

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  type = options[:output_type]
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil?

  res = self.api.data.pull(id, options)

  handle_downloading_manifest(file, res, type)

  if type == "json"
    display "{}"
  else
    display "finished pulling dataset ##{id} to file #{file}"
  end

end

#pull_to_s3Object

data:pull:s3 ID S3_PATH

Pulls dataset to s3_bucket/s3_key/part***.gz using the given s3_access and s3_secret credentials. S3_PATH may be given in the following forms:

1) s3://s3_access:s3_secret@s3_bucket/s3_key
2) s3://s3_bucket/s3_key: also supply --s3_access and --s3_secret OR set the environment variables S3_ACCESS and S3_SECRET
3) s3_key: also supply --s3_access, --s3_secret and --s3_bucket OR set the environment variables S3_ACCESS and S3_SECRET and supply --s3_bucket

–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –s3_access [s3_access_key] # S3 access key –s3_secret [s3_secret_key] # S3 secret key –s3_bucket [s3_bucket] # S3 bucket to store data at –s3_key [s3_file_key] # S3 key to store data at –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# File 'lib/zillabyte/cli/data.rb', line 332

def pull_to_s3

  id = options[:id] || shift_argument
  type = options[:output_type]
  error("no id given", type) if id.nil?

  s3_path = options[:s3_path] || shift_argument
  error("no s3 path given", type) if s3_path.nil?

  matches = s3_path.match(/^s3:\/\/([A-Z0-9]{20}):([$-\/:-?{-~!"^_`\[\]\w]{40})@([\w\.]*)\/([-\w\/]*)$/)
  if !matches.nil?
    s3_access, s3_secret, s3_bucket, s3_key = matches.captures
  else
    matches = s3_path.match(/^s3:\/\/([\w\.]*)\/([-\w\/]*)$/)
    if !matches.nil?
      s3_bucket, s3_key = matches.captures
    else
      s3_key = s3_path
    end
  end
  s3_access = options[:s3_access] || ENV["S3_ACCESS"] if s3_access == "" or s3_access.nil?
  s3_secret = options[:s3_secret] || ENV["S3_SECRET"] if s3_secret == "" or s3_secret.nil?
  s3_bucket = options[:s3_bucket] if s3_bucket == "" or s3_bucket.nil?

  error("No s3 access key or invalid access key provided. Please check that you have entered the access key correctly.", type) if s3_access.nil?
  error("No s3 access secret key or invalid secret key provided. Please check that you have entered the secret key correctly.", type) if s3_secret.nil?
  error("No s3 access bucket or invalid bucket provided. Please check that you have entered the bucket correctly.", type) if s3_bucket.nil?
  error("No s3 file key provided. Please check that you have entered the file key correctly.", type) if s3_key.nil?

  s3_params = {:s3_access_key => s3_access, :s3_secret => s3_secret, 
               :s3_bucket => s3_bucket, :s3_file_key => s3_key}
  s3_params[:version_id] = options[:version_id] if options[:version_id]

  res = self.api.data.pull_to_s3(id, s3_params)

  if type == "json"
    display "{}"
  else
    display "downloading dataset to s3://#{res["s3_bucket"]}/#{res["s3_file_key"]}/"
    display "if the dataset is large, this may take a while, please check your s3 account after a few minutes"
  end
end

#readmeObject

data:readme ID FILE

Attaches a README file to a dataset

#HIDDEN



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/zillabyte/cli/data.rb', line 213

def readme

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil? 
  error("file doesn't exist") unless File.exists?(file)
  content = File.read(file)

  res = self.api.request(
    :expects  => 200,
    :method   => :post,
    :path     => "/relations/#{CGI.escape(id)}/readme",
    :body     => {:filename => file, :content => content}.to_json
  ).body

  display "README updated"
end

#showObject

data:show ID

Shows a sample of the dataset.

–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –no_truncation # Don’t truncate long strings –meta # Show metadata columns (since, confidence, source) –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/zillabyte/cli/data.rb', line 390

def show
  name = options[:name] || shift_argument
  type = options[:output_type]
  show_meta = options[:meta] || false
  error "no id given" if name.nil?
  
  # Initial request..
  res = self.api.data.show(name, :post, options)

  if res['job_id']
    job_id = res['job_id']
    options[:job_id] = job_id

    # Poll until the results are ready... 
    start = Time.now.utc

    display "Fetching your data, please wait..." if type.nil?

    while(Time.now.utc < start + MAX_POLL_SECONDS) do

      # Poll
      res = self.api.data.show(name, :get, options)

      # Status?
      case res['status']
      when 'completed'
        if res['return']
          res = res['return']
        else
          throw "something is wrong: #{res}"
        end
        # success! continue below
        break
      when 'running'
        sleep(POLL_SLEEP)
     #   display ".", false
      else
        throw "unknown status: #{res}"
      end
      
    end
  else
    if res['error']
      error(res['error_message'] || res['error'], type)
    else
      error("remote server error (r256)", type)
    end
  end 

  # We only reach here after polling is complete...
  if res["rows"] && res["rows"].size > 0
    headings = []
    concrete_headings = res["rows"].first.keys
    concrete_headings.delete("id")
    META_COLUMNS.each {|c| concrete_headings.delete c} if (!show_meta)
    concrete_headings.each do |ch|
      has_alias = false
      (res['column_aliases'] || []).each do |al|
        if(al["concrete_name"] == ch)
          headings << al["alias"]
          has_alias = true
        end
      end
      headings << ch if !has_alias
    end

    rows = []
    res["rows"].each do |obj|
      new_row = concrete_headings.map do |heading|
        if options[:no_truncation]
          obj[heading]
        else
          if obj[heading].to_s.size > 30 && type != "json"
            obj[heading].to_s[0..30] + "..."
          else
            obj[heading]
          end
        end
      end
      rows << new_row
    end
    require("colorize")
    require("zillabyte/cli/helpers/table_output_builder")
    if type.nil?
      display "Sampled output:"
      display TableOutputBuilder.build_terminal_table(headings, rows)
      display "To download your full dataset, type "+"`zillabyte data:pull [RELATION_NAME] [OUTPUT_PREFIX] [DIRECTORY]`".colorize(:green)+"."
    else
      display TableOutputBuilder.build_table(headings, rows, type)
    end
  else
    if type == "json"
      display "{}"
    else
      display "empty dataset"
    end
  end
  
end