Class: Zillabyte::Command::Data

Inherits:

Base

Object
Base
Zillabyte::Command::Data

show all

Defined in:: lib/zillabyte/cli/data.rb

Overview

manage custom datasets

Constant Summary collapse

MAX_POLL_SECONDS =

60 * 5

POLL_SLEEP =

APPENDS_ROWS_SLICE =

5_000

Constants inherited from Base

Base::META_COLUMNS

Instance Attribute Summary

Attributes inherited from Base

#args, #options

Instance Method Summary collapse

#append ⇒ Object

data:append ID FILE.
#authorize ⇒ Object

data:authorize [ID] [SCOPE].
#create ⇒ Object

data:create NAME.
#delete ⇒ Object

data:delete ID.
#index ⇒ Object

data.
#list ⇒ Object

data Lists your custom datasets.
#pull ⇒ Object

data:pull ID OUTPUT.
#pull_to_s3 ⇒ Object

data:pull:s3 ID S3_PATH.
#readme ⇒ Object

data:readme ID FILE.
#show ⇒ Object

data:show ID.

Methods inherited from Base

#api, #initialize, namespace

Methods included from Helpers

#app, #ask, #command, #create_git_remote, #display, #error, #extract_app_from_git_config, #extract_app_in_dir, #format_with_bang, #friendly_dir, #get_flow_ui_link, #get_info, #get_rich_info, #git, #handle_downloading_manifest, #has_git?, #longest, #read_multiline, #truncate_message, #with_tty

Constructor Details

This class inherits a constructor from Zillabyte::Command::Base

Instance Method Details

#append ⇒ `Object`

data:append ID FILE

Adds data to an existing dataset.

–filetype FILETYPE # Input File format type, defaults to csv –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 246

def append

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  type = options[:output_type]

  filetype = options[:filetype]
  filetype ||= File.extname(file || "").gsub(".", "")
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil?
  
  dataset = self.api.data.get(id, options)
  columns = dataset["columns"].map{|col| {col["index"] => col["type"]}}
  raw_rows = sanity_check_file(file,filetype,{"columns" => columns}, type)
 
  total_rows = 0 
  display("uploading content.", false)
  raw_rows.each_slice(APPENDS_ROWS_SLICE) do |rows|
    
    
    # TODO: post to direct signed s3 (http://docs.aws.amazon.com/AWSRubySDK/latest/AWS/S3/PresignedPost.html)
    display(".", false)
    require("base64")
    res = self.api.data.append(id, {:gzip_rows => Base64.encode64(gzip(rows.to_json()))})
    # res = self.api.data.append(id, {:rows => rows})
    
    break unless res["size"]
    total_rows += res["size"]
  end
  
  if type == "json"
    display({:rows => total_rows}.to_json)
  else
    display "dataset ##{id} appended #{total_rows} rows"
  end

end

#authorize ⇒ `Object`

data:authorize [ID] [SCOPE]

changes permission on the dataset

–id ID # The dataset id –public # Makes the dataset public (default) –private # Makes the dataset private

# File 'lib/zillabyte/cli/data.rb', line 183

def authorize

  id = options[:id] || shift_argument
  scope = options[:scope] || shift_argument || "public"
  make_public = options[:public]
  make_private = options[:private]
  
  error("no id given", type) if id.nil?
  error("both --public and --private cannot be given", type) if make_public && make_private
  if make_private
    scope = "private"
  end

  res = self.api.request(
    :expects  => 200,
    :method   => :post,
    :path     => "/relations/#{CGI.escape(id)}/authorizations",
    :body     => {:scope => scope}.to_json
  ).body

  display "Authorization updated"
end

#create ⇒ `Object`

data:create NAME

Creates a new dataset.

–schema SCHEMA # Column names and types in the format “field_1:output_type_1,field_2:output_type_2,…” –public SCOPE # Make the dataset public –file FILE # A data file –filetype FILETYPE # File format type, defaults to csv –description DESCRIPTION # Description of dataset contents –aliases ALIASES # Dataset name aliases in the format “alias_1,alias_2,…” –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 127

def create

  name = options[:name] || shift_argument
  file = options[:file] || nil
  filetype = options[:filetype] || nil
  type = options[:output_type]

  error("no name given", type) if name.nil?

  schema = options[:schema] if options[:schema]
  is_public = options[:public] || false
  description = options[:description] || nil
  aliases = options[:aliases] || nil

  if type.nil?
    hash = get_dataset_properties(schema,is_public,description,aliases)
  else
    hash = hash_dataset_properties(schema,is_public,description,aliases, type)
  end

  if file
    filetype ||= File.extname(file).gsub(".", "")
    rows = sanity_check_file(file,filetype, {"columns" => hash[:schema]}, type)
    hash[:rows] = rows
  end

  res = api.data.create name, hash
  if res['error']
    error("#{res['error_message']}", type)
  else
    if type == "json"
      display "{}"
    else
      display "dataset ##{res['id']} #{res['action']}. size: #{res['size'] || 0} rows."
    end
  end

end

#delete ⇒ `Object`

data:delete ID

Deletes a dataset.

-f, –force # Delete without asking for confirmation –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 75

def delete 
  id = options[:id] || shift_argument
  forced = options[:force]
  type = options[:output_type] || nil

  if !forced

    if !type.nil?
      error("specify -f, --force to confirm deletion", type)
    end

    while true
      display "This operation cannot be undone. Are you sure you want to delete this dataset? (yes/no):", false
      confirm = ask
      break if confirm == "yes" || confirm == "no"
      display "Please enter 'yes' to delete the dataset or 'no' to exit"
    end
  end

  confirmed = forced || confirm == "yes"
  if confirmed
    res = api.data.delete(id, options)

    if res['error']
      error(res['error'], type)
    else
      if type == "json"
        display "{}"
      else
        display res["body"]
      end
    end
  end
end

#index ⇒ `Object`

data

Lists your custom datasets.

–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN



17
18
19

# File 'lib/zillabyte/cli/data.rb', line 17

def index
  self.list
end

#list ⇒ `Object`

data

Lists your custom datasets.

–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 29

def list
  type = options[:output_type] || nil

  response = api.request(
    :expects  => 200,
    :method   => :get,
    :path     => "/relations"
  )

  headings = ["id", "name","rows"]
  rows = response.body.map do |row|
    headings = row.keys if headings.size == 0
    row["columns"] = row["columns"].map{|c|c['type']}.join(',')
    row["aliases"] = row["aliases"].map{|a| a['name']}.join(',')

    row = row.keep_if {|col,val| headings.include? col}
    vals = row.map do |col,val| 
      if col == "rows"
        require("zillabyte/cli/helpers/table_output_builder")
        val = TableOutputBuilder.format_row_count(val)
      end
      val
    end
    vals 
  end

  display "datasets\n" if type.nil? && rows.size > 0
  require("zillabyte/cli/helpers/table_output_builder")
  display TableOutputBuilder.build_table(headings, rows, type)
  display "Total number of datasets: "+rows.length.to_s if type.nil?

end

#pull ⇒ `Object`

data:pull ID OUTPUT

Pulls dataset into OUTPUT.gz.

–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 297

def pull

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  type = options[:output_type]
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil?

  file = "#{file}.gz" unless File.extname(file) == ".gz"

  res = self.api.data.pull(id, options)

  handle_downloading_manifest(file, res, type)

  if type == "json"
    display "{}"
  else
    display "finished pulling dataset ##{id} to file"
  end

end

#pull_to_s3 ⇒ `Object`

data:pull:s3 ID S3_PATH

Pulls dataset to s3_bucket/s3_key/part***.gz using the given s3_access and s3_secret credentials. S3_PATH may be given in the following forms:

1) s3://s3_access:s3_secret@s3_bucket/s3_key
2) s3://s3_bucket/s3_key: also supply --s3_access and --s3_secret OR set the environment variables S3_ACCESS and S3_SECRET
3) s3_key: also supply --s3_access, --s3_secret and --s3_bucket OR set the environment variables S3_ACCESS and S3_SECRET and supply --s3_bucket

–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –s3_access [s3_access_key] # S3 access key –s3_secret [s3_secret_key] # S3 secret key –s3_bucket [s3_bucket] # S3 bucket to store data at –s3_key [s3_file_key] # S3 key to store data at –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 345

def pull_to_s3

  id = options[:id] || shift_argument
  type = options[:output_type]
  error("no id given", type) if id.nil?

  s3_path = options[:s3_path] || shift_argument
  error("no s3 path given", type) if s3_path.nil?

  matches = s3_path.match(/^s3:\/\/([A-Z0-9]{20}):([$-\/:-?{-~!"^_`\[\]\w]{40})@([\w\.]*)\/([-\w\/]*)$/)
  if !matches.nil?
    s3_access, s3_secret, s3_bucket, s3_key = matches.captures
  else
    matches = s3_path.match(/^s3:\/\/([\w\.]*)\/([-\w\/]*)$/)
    if !matches.nil?
      s3_bucket, s3_key = matches.captures
    else
      s3_key = s3_path
    end
  end
  s3_access = options[:s3_access] || ENV["S3_ACCESS"] if s3_access == "" or s3_access.nil?
  s3_secret = options[:s3_secret] || ENV["S3_SECRET"] if s3_secret == "" or s3_secret.nil?
  s3_bucket = options[:s3_bucket] if s3_bucket == "" or s3_bucket.nil?

  error("No s3 access key or invalid access key provided. Please check that you have entered the access key correctly.", type) if s3_access.nil?
  error("No s3 access secret key or invalid secret key provided. Please check that you have entered the secret key correctly.", type) if s3_secret.nil?
  error("No s3 access bucket or invalid bucket provided. Please check that you have entered the bucket correctly.", type) if s3_bucket.nil?
  error("No s3 file key provided. Please check that you have entered the file key correctly.", type) if s3_key.nil?

  s3_params = {:s3_access_key => s3_access, :s3_secret => s3_secret, 
               :s3_bucket => s3_bucket, :s3_file_key => s3_key}
  s3_params[:cycle_id] = options[:cycle_id] if options[:cycle_id]

  res = self.api.data.pull_to_s3(id, s3_params)

  if type == "json"
    display "{}"
  else
    display "downloading dataset to s3://#{res["s3_bucket"]}/#{res["s3_file_key"]}/"
    display "if the dataset is large, this may take a while, please check your s3 account after a few minutes"
  end
end

#readme ⇒ `Object`

data:readme ID FILE

Attaches a README file to a dataset

#HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 217

def readme

  id = options[:id] || shift_argument
  file = options[:file] || shift_argument
  error("no id given", type) if id.nil?
  error("no file given", type) if file.nil? 
  error("file doesn't exist") unless File.exists?(file)
  content = File.read(file)

  res = self.api.request(
    :expects  => 200,
    :method   => :post,
    :path     => "/relations/#{CGI.escape(id)}/readme",
    :body     => {:filename => file, :content => content}.to_json
  ).body

  display "README updated"
end

#show ⇒ `Object`

data:show ID

Shows a sample of the dataset. See ‘zillabyte queries’ for more elaborate functionality.

–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –no_truncation # Don’t truncate long strings –meta # Show metadata columns (since, confidence, source) –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN

# File 'lib/zillabyte/cli/data.rb', line 404

def show
  name = options[:name] || shift_argument
  type = options[:output_type]
  show_meta = options[:meta] || false
  error "no id given" if name.nil?
  
  # Initial request..
  res = self.api.data.show(name, :post, options)

  if res['job_id']
    job_id = res['job_id']
    options[:job_id] = job_id

    # Poll until the results are ready... 
    start = Time.now.utc

    display "Fetching your data, please wait..." if type.nil?

    while(Time.now.utc < start + MAX_POLL_SECONDS) do

      # Poll
      res = self.api.data.show(name, :get, options)

      # Status?
      case res['status']
      when 'completed'
        if res['return']
          res = res['return']
        else
          throw "something is wrong: #{res}"
        end
        # success! continue below
        break
      when 'running'
        sleep(POLL_SLEEP)
     #   display ".", false
      else
        throw "unknown status: #{res}"
      end
      
    end
  else
    if res['error']
      error(res['error_message'] || res['error'], type)
    else
      error("remote server error (r256)", type)
    end
  end 

  # We only reach here after polling is complete...
  if res["rows"] && res["rows"].size > 0
    headings = []
    concrete_headings = res["rows"].first.keys
    concrete_headings.delete("id")
    META_COLUMNS.each {|c| concrete_headings.delete c} if (!show_meta)
    concrete_headings.each do |ch|
      has_alias = false
      (res['column_aliases'] || []).each do |al|
        if(al["concrete_name"] == ch)
          headings << al["alias"]
          has_alias = true
        end
      end
      headings << ch if !has_alias
    end

    rows = []
    res["rows"].each do |obj|
      new_row = concrete_headings.map do |heading|
        if options[:no_truncation]
          obj[heading]
        else
          if obj[heading].to_s.size > 30 && type != "json"
            obj[heading].to_s[0..30] + "..."
          else
            obj[heading]
          end
        end
      end
      rows << new_row
    end
    require("colorize")
    require("zillabyte/cli/helpers/table_output_builder")
    if type.nil?
      display "Sampled output:"
      display TableOutputBuilder.build_terminal_table(headings, rows)
      display "To download your full dataset, type "+"`zillabyte data:pull [RELATION_NAME] [OUTPUT_PREFIX] [DIRECTORY]`".colorize(:green)+"."
    else
      display TableOutputBuilder.build_table(headings, rows, type)
    end
  else
    if type == "json"
      display "{}"
    else
      display "empty dataset"
    end
  end
  
end

Class: Zillabyte::Command::Data

Overview

Constant Summary collapse

Constants inherited from Base

Instance Attribute Summary

Attributes inherited from Base

Instance Method Summary collapse

Methods inherited from Base

Methods included from Helpers

Constructor Details

Instance Method Details

#append ⇒ Object

#authorize ⇒ Object

#create ⇒ Object

#delete ⇒ Object

#index ⇒ Object

#list ⇒ Object

#pull ⇒ Object

#pull_to_s3 ⇒ Object

#readme ⇒ Object

#show ⇒ Object