Class: Gcloud::Bigquery::Table

Inherits:
Object
  • Object
show all
Defined in:
lib/gcloud/bigquery/table.rb,
lib/gcloud/bigquery/table/list.rb,
lib/gcloud/bigquery/table/schema.rb

Overview

Table

A named resource representing a BigQuery table that holds zero or more records. Every table is defined by a schema that may contain nested and repeated fields.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"

table = dataset.create_table "my_table" do |schema|
  schema.string "first_name", mode: :required
  schema.record "cities_lived", mode: :repeated do |nested_schema|
    nested_schema.string "place", mode: :required
    nested_schema.integer "number_of_years", mode: :required
  end
end

row = {
  "first_name" => "Alice",
  "cities_lived" => [
    {
      "place" => "Seattle",
      "number_of_years" => 5
    },
    {
      "place" => "Stockholm",
      "number_of_years" => 6
    }
  ]
}
table.insert row

See Also:

Defined Under Namespace

Classes: List, Schema

Attributes collapse

Data collapse

Lifecycle collapse

Instance Method Details

#api_urlObject

A URL that can be used to access the dataset using the REST API.


189
190
191
192
# File 'lib/gcloud/bigquery/table.rb', line 189

def api_url
  ensure_full_data!
  @gapi["selfLink"]
end

#bytes_countObject

The number of bytes in the table.


218
219
220
221
# File 'lib/gcloud/bigquery/table.rb', line 218

def bytes_count
  ensure_full_data!
  @gapi["numBytes"]
end

#copy(destination_table, create: nil, write: nil, dryrun: nil) ⇒ Gcloud::Bigquery::CopyJob

Copies the data from the table to another table. The destination table argument can also be a string identifier as specified by the Query Reference: project_name:datasetId.tableId. This is useful for referencing tables in other projects and datasets.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"
destination_table = dataset.table "my_destination_table"

copy_job = table.copy destination_table

Passing a string identifier for the destination table:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

copy_job = table.copy "other-project:other_dataset.other_table"

488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/gcloud/bigquery/table.rb', line 488

def copy destination_table, create: nil, write: nil, dryrun: nil
  ensure_connection!
  options = { create: create, write: write, dryrun: dryrun }
  resp = connection.copy_table table_ref,
                               get_table_ref(destination_table),
                               options
  if resp.success?
    Job.from_gapi resp.data, connection
  else
    fail ApiError.from_response(resp)
  end
end

#created_atObject

The time when this table was created.


238
239
240
241
# File 'lib/gcloud/bigquery/table.rb', line 238

def created_at
  ensure_full_data!
  Time.at(@gapi["creationTime"] / 1000.0)
end

#data(token: nil, max: nil, start: nil) ⇒ Gcloud::Bigquery::Data

Retrieves data from the table.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

data = table.data
data.each do |row|
  puts row["first_name"]
end
more_data = table.data token: data.token

426
427
428
429
430
431
432
433
434
435
# File 'lib/gcloud/bigquery/table.rb', line 426

def data token: nil, max: nil, start: nil
  ensure_connection!
  options = { token: token, max: max, start: start }
  resp = connection.list_tabledata dataset_id, table_id, options
  if resp.success?
    Data.from_response resp, self
  else
    fail ApiError.from_response(resp)
  end
end

#dataset_idObject

The ID of the Dataset containing this table.


98
99
100
# File 'lib/gcloud/bigquery/table.rb', line 98

def dataset_id
  @gapi["tableReference"]["datasetId"]
end

#deleteBoolean

Permanently deletes the table.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

table.delete

819
820
821
822
823
824
825
826
827
# File 'lib/gcloud/bigquery/table.rb', line 819

def delete
  ensure_connection!
  resp = connection.delete_table dataset_id, table_id
  if resp.success?
    true
  else
    fail ApiError.from_response(resp)
  end
end

#descriptionObject

The description of the table.


199
200
201
202
# File 'lib/gcloud/bigquery/table.rb', line 199

def description
  ensure_full_data!
  @gapi["description"]
end

#description=(new_description) ⇒ Object

Updates the description of the table.


209
210
211
# File 'lib/gcloud/bigquery/table.rb', line 209

def description= new_description
  patch_gapi! description: new_description
end

#etagObject

A string hash of the dataset.


179
180
181
182
# File 'lib/gcloud/bigquery/table.rb', line 179

def etag
  ensure_full_data!
  @gapi["etag"]
end

#expires_atObject

The time when this table expires. If not present, the table will persist indefinitely. Expired tables will be deleted and their storage reclaimed.


250
251
252
253
254
# File 'lib/gcloud/bigquery/table.rb', line 250

def expires_at
  ensure_full_data!
  return nil if @gapi["expirationTime"].nil?
  Time.at(@gapi["expirationTime"] / 1000.0)
end

#extract(extract_url, format: nil, compression: nil, delimiter: nil, header: nil, dryrun: nil) ⇒ Gcloud::Bigquery::ExtractJob

Extract the data from the table to a Google Cloud Storage file.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

extract_job = table.extract "gs://my-bucket/file-name.json",
                            format: "json"

See Also:


577
578
579
580
581
582
583
584
585
586
587
588
# File 'lib/gcloud/bigquery/table.rb', line 577

def extract extract_url, format: nil, compression: nil, delimiter: nil,
            header: nil, dryrun: nil
  ensure_connection!
  options = { format: format, compression: compression,
              delimiter: delimiter, header: header, dryrun: dryrun }
  resp = connection.extract_table table_ref, extract_url, options
  if resp.success?
    Job.from_gapi resp.data, connection
  else
    fail ApiError.from_response(resp)
  end
end

#fieldsObject

The fields of the table.


383
384
385
386
387
388
# File 'lib/gcloud/bigquery/table.rb', line 383

def fields
  f = schema["fields"]
  f = f.to_hash if f.respond_to? :to_hash
  f = [] if f.nil?
  f
end

#headersObject

The names of the columns in the table.


395
396
397
# File 'lib/gcloud/bigquery/table.rb', line 395

def headers
  fields.map { |f| f["name"] }
end

#idObject

The combined Project ID, Dataset ID, and Table ID for this table, in the format specified by the Query Reference: project_name:datasetId.tableId. To use this value in queries see #query_id.


130
131
132
# File 'lib/gcloud/bigquery/table.rb', line 130

def id
  @gapi["id"]
end

#insert(rows, skip_invalid: nil, ignore_unknown: nil) ⇒ Gcloud::Bigquery::InsertResponse

Inserts data into the table for near-immediate querying, without the need to complete a #load operation before the data can appear in query results.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

rows = [
  { "first_name" => "Alice", "age" => 21 },
  { "first_name" => "Bob", "age" => 22 }
]
table.insert rows

See Also:


790
791
792
793
794
795
796
797
798
799
800
# File 'lib/gcloud/bigquery/table.rb', line 790

def insert rows, skip_invalid: nil, ignore_unknown: nil
  rows = [rows] if rows.is_a? Hash
  ensure_connection!
  options = { skip_invalid: skip_invalid, ignore_unknown: ignore_unknown }
  resp = connection.insert_tabledata dataset_id, table_id, rows, options
  if resp.success?
    InsertResponse.from_gapi rows, resp.data
  else
    fail ApiError.from_response(resp)
  end
end

#load(file, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil) ⇒ Gcloud::Bigquery::LoadJob

Loads data into the table. You can pass a gcloud storage file path or a gcloud storage file instance. Or, you can upload a file directly. See Loading Data with a POST Request.

A note about large direct uploads

You may encounter a Broken pipe (Errno::EPIPE) error when attempting to upload large files. To avoid this problem, add the httpclient gem to your project, and the line (or lines) of configuration shown below. These lines must execute after you require gcloud but before you make your first gcloud connection. The first statement configures Faraday to use httpclient. The second statement, which should only be added if you are using a version of Faraday at or above 0.9.2, is a workaround for this gzip issue.

require "gcloud"

# Use httpclient to avoid broken pipe errors with large uploads Faraday.default_adapter = :httpclient

# Only add the following statement if using Faraday >= 0.9.2 # Override gzip middleware with no-op for httpclient Faraday::Response.register_middleware :gzip => Faraday::Response::Middleware

gcloud = Gcloud.new bigquery = gcloud.bigquery

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

load_job = table.load "gs://my-bucket/file-name.csv"

Pass a gcloud storage file instance:


require "gcloud"
require "gcloud/storage"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

storage = gcloud.storage
bucket = storage.bucket "my-bucket"
file = bucket.file "file-name.csv"
load_job = table.load file

Upload a file directly:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

file = File.open "my_data.csv"
load_job = table.load file

738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
# File 'lib/gcloud/bigquery/table.rb', line 738

def load file, format: nil, create: nil, write: nil,
         projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
         encoding: nil, delimiter: nil, ignore_unknown: nil,
         max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil
  ensure_connection!
  options = { format: format, create: create, write: write,
              projection_fields: projection_fields,
              jagged_rows: jagged_rows, quoted_newlines: quoted_newlines,
              encoding: encoding, delimiter: delimiter,
              ignore_unknown: ignore_unknown,
              max_bad_records: max_bad_records, quote: quote,
              skip_leading: skip_leading, dryrun: dryrun }
  return load_storage(file, options) if storage_url? file
  return load_local(file, options) if local_file? file
  fail Gcloud::Bigquery::Error, "Don't know how to load #{file}"
end

#locationObject

The geographic location where the table should reside. Possible values include EU and US. The default value is US.


290
291
292
293
# File 'lib/gcloud/bigquery/table.rb', line 290

def location
  ensure_full_data!
  @gapi["location"]
end

#modified_atObject

The date when this table was last modified.


261
262
263
264
# File 'lib/gcloud/bigquery/table.rb', line 261

def modified_at
  ensure_full_data!
  Time.at(@gapi["lastModifiedTime"] / 1000.0)
end

#nameObject

The name of the table.


161
162
163
# File 'lib/gcloud/bigquery/table.rb', line 161

def name
  @gapi["friendlyName"]
end

#name=(new_name) ⇒ Object

Updates the name of the table.


170
171
172
# File 'lib/gcloud/bigquery/table.rb', line 170

def name= new_name
  patch_gapi! name: new_name
end

#project_idObject

The ID of the Project containing this table.


107
108
109
# File 'lib/gcloud/bigquery/table.rb', line 107

def project_id
  @gapi["tableReference"]["projectId"]
end

#query_idObject

The value returned by #id, wrapped in square brackets if the Project ID contains dashes, as specified by the Query Reference. Useful in queries.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.table "my_table"

data = bigquery.query "SELECT name FROM #{table.query_id}"

152
153
154
# File 'lib/gcloud/bigquery/table.rb', line 152

def query_id
  project_id["-"] ? "[#{id}]" : id
end

#reload!Object Also known as: refresh!

Reloads the table with current data from the BigQuery service.


834
835
836
837
838
839
840
841
842
# File 'lib/gcloud/bigquery/table.rb', line 834

def reload!
  ensure_connection!
  resp = connection.get_table dataset_id, table_id
  if resp.success?
    @gapi = resp.data
  else
    fail ApiError.from_response(resp)
  end
end

#rows_countObject

The number of rows in the table.


228
229
230
231
# File 'lib/gcloud/bigquery/table.rb', line 228

def rows_count
  ensure_full_data!
  @gapi["numRows"]
end

#schema(replace: false) {|schema_builder| ... } ⇒ Object

Returns the table's schema as hash containing the keys and values returned by the Google Cloud BigQuery Rest API . This method can also be used to set, replace, or add to the schema by passing a block. See Schema for available methods. To set the schema by passing a hash instead, use #schema=.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.create_table "my_table"

table.schema do |schema|
  schema.string "first_name", mode: :required
  schema.record "cities_lived", mode: :repeated do |nested_schema|
    nested_schema.string "place", mode: :required
    nested_schema.integer "number_of_years", mode: :required
  end
end

Yields:

  • (schema_builder)

327
328
329
330
331
332
333
334
335
336
337
# File 'lib/gcloud/bigquery/table.rb', line 327

def schema replace: false
  ensure_full_data!
  g = @gapi
  g = g.to_hash if g.respond_to? :to_hash
  s = g["schema"] ||= {}
  return s unless block_given?
  s = nil if replace
  schema_builder = Schema.new s
  yield schema_builder
  self.schema = schema_builder.schema if schema_builder.changed?
end

#schema=(new_schema) ⇒ Object

Updates the schema of the table. To update the schema using a block instead, use #schema.

Examples:

require "gcloud"

gcloud = Gcloud.new
bigquery = gcloud.bigquery
dataset = bigquery.dataset "my_dataset"
table = dataset.create_table "my_table"

schema = {
  "fields" => [
    {
      "name" => "first_name",
      "type" => "STRING",
      "mode" => "REQUIRED"
    },
    {
      "name" => "age",
      "type" => "INTEGER",
      "mode" => "REQUIRED"
    }
  ]
}
table.schema = schema

374
375
376
# File 'lib/gcloud/bigquery/table.rb', line 374

def schema= new_schema
  patch_gapi! schema: new_schema
end

#table?Boolean

Checks if the table's type is "TABLE".


271
272
273
# File 'lib/gcloud/bigquery/table.rb', line 271

def table?
  @gapi["type"] == "TABLE"
end

#table_idObject

A unique ID for this table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.


89
90
91
# File 'lib/gcloud/bigquery/table.rb', line 89

def table_id
  @gapi["tableReference"]["tableId"]
end

#view?Boolean

Checks if the table's type is "VIEW".


280
281
282
# File 'lib/gcloud/bigquery/table.rb', line 280

def view?
  @gapi["type"] == "VIEW"
end