Class: Driftwood::Bigquery

Inherits:
Object
  • Object
show all
Defined in:
lib/driftwood/bigquery.rb

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Bigquery

Returns a new instance of Bigquery.



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/driftwood/bigquery.rb', line 4

def initialize(config)
  $logger.info "Starting Bigquery connection"

  config[:keyfile] = File.expand_path(config[:keyfile])

  @bigquery = Google::Cloud::Bigquery.new(
    :project_id  => config[:project],
    :credentials => Google::Cloud::Bigquery::Credentials.new(config[:keyfile]),
  )
  @dataset = @bigquery.dataset(config[:dataset])
  raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(config)}" if @dataset.nil?

  if config[:noinit]
    $logger.warn "Skipping reinitialization as instructed"
    @teams     = @dataset.table('slack_teams')
    @active    = @dataset.table('slack_billing_active')
    @creation  = @dataset.table('slack_user_creation')
    @messages  = @dataset.table('slack_messages')
    @channels  = @dataset.table('slack_channels')
    @users     = @dataset.table('slack_users')
    return
  end

  @teams = @dataset.table('slack_teams') || @dataset.create_table('slack_teams') do |table|
                                                    table.name        = 'Slack Teams'
                                                    table.description = 'A list of all team names & ID, plus some metadata'
                                                    table.schema do |s|
                                                      s.string "team_id",           mode: :required
                                                      s.string "name",              mode: :required
                                                      s.string "user_access_token", mode: :required
                                                      s.string "bot_user_id",       mode: :required
                                                      s.string "bot_access_token",  mode: :required
                                                      s.string "application_id",    mode: :required
                                                    end
                                                  end

  @creation = @dataset.table('slack_user_creation') || @dataset.create_table('slack_user_creation') do |table|
                                                    table.name        = 'Slack User Creation'
                                                    table.description = 'This keeps track of when users are created. The Slack API does not provide this info.'
                                                    table.schema do |s|
                                                      s.string    "team_id",   mode: :required
                                                      s.string    "user_id",   mode: :required
                                                      s.timestamp "creation",  mode: :required
                                                    end
                                                  end

  @messages = @dataset.table('slack_messages') || @dataset.create_table('slack_messages') do |table|
                                                    table.name        = 'Slack Message Log'
                                                    table.description = 'All messages are logged here permanently.'
                                                    table.schema do |s|
                                                      s.string "team_id",    mode: :required
                                                      s.string "channel_id", mode: :required
                                                      s.string "user_id",    mode: :required
                                                      s.string "ts",         mode: :required
                                                      s.string "text",       mode: :required
                                                    end
                                                  end

  # these we delete and rebuild because it's about 17 million times faster. Only slightly exaggerated.
  begin
    @dataset.table('slack_channels').delete
    @channels = @dataset.create_table('slack_channels') do |table|
      table.name        = 'Slack Channels'
      table.description = 'A list of all channels and metadata'
      table.schema do |s|
        s.string  "team_id",     mode: :required
        s.string  "channel_id",  mode: :required
        s.string  "name",        mode: :required
        s.integer "created",     mode: :required
        s.boolean "is_private",  mode: :required
        s.string  "topic",       mode: :required
        s.string  "purpose",     mode: :required
        s.integer "num_members", mode: :required
      end
    end
  rescue => e
    $logger.error e.message
    $logger.debug e.backtrace.join("\n")
    @channels = @dataset.table('slack_channels')
  end

  begin
    @dataset.table('slack_users').delete
    @users = @dataset.create_table('slack_users') do |table|
      table.name        = 'Slack Users'
      table.description = 'A list of all users and metadata. Ironically, we only store the email so we can delete if a GDPR request is made. We cannot do anything else with it.'
      table.schema do |s|
        s.string  "team_id",       mode: :required
        s.string  "user_id",       mode: :required
        s.string  "name",          mode: :required
        s.string  "real_name",     mode: :required
        s.string  "display_name",  mode: :required
        s.boolean "is_owner",      mode: :required
        s.boolean "is_admin",      mode: :required
        s.string  "title"
        s.string  "phone"
        s.string  "skype"
        s.string  "email"
        s.string  "tz"
        s.string  "tz_offset"
        s.string  "status_text",   mode: :required
        s.string  "status_emoji",  mode: :required
        s.string  "image_72",      mode: :required
        s.string  "image_192",     mode: :required
        s.integer "updated",       mode: :required
        s.boolean "deleted",       mode: :required
      end
    end
  rescue => e
    $logger.error e.message
    $logger.debug e.backtrace.join("\n")
    @users = @dataset.table('slack_users')
  end
  $logger.info "Bigquery initialization complete"
end

Instance Method Details

#deduplicate_table(table) ⇒ Object



181
182
183
# File 'lib/driftwood/bigquery.rb', line 181

def deduplicate_table(table)

end

#get_auth_tokens(application_id) ⇒ Object



120
121
122
# File 'lib/driftwood/bigquery.rb', line 120

def get_auth_tokens(application_id)
  @dataset.query("SELECT * FROM slack_teams WHERE application_id = '#{application_id}'").to_a rescue []
end

#insert_channel(record) ⇒ Object



133
134
135
# File 'lib/driftwood/bigquery.rb', line 133

def insert_channel(record)
  @channels.insert(record).success?
end

#insert_message(record) ⇒ Object



146
147
148
# File 'lib/driftwood/bigquery.rb', line 146

def insert_message(record)
  @messages.insert(record).success?
end

#insert_team(auth_token, application_id) ⇒ Object



124
125
126
127
128
129
130
131
# File 'lib/driftwood/bigquery.rb', line 124

def insert_team(auth_token, application_id)
  auth_token[:application_id] = application_id

  # We can take the 2.5s delete-before-insert here because it only happens when
  # authorizing a new team integration.
  @dataset.query("DELETE FROM slack_teams WHERE team_id = '#{auth_token[:team_id]}' AND application_id = '#{application_id}'")
  @teams.insert(auth_token).success?
end

#insert_user(record) ⇒ Object



137
138
139
140
141
142
143
144
# File 'lib/driftwood/bigquery.rb', line 137

def insert_user(record)
  if @users.insert(record).success?
    record_user_creation(record['team_id'], record['user_id'])
  else
    false
  end

end

#newest_message_timestampObject



150
151
152
# File 'lib/driftwood/bigquery.rb', line 150

def newest_message_timestamp
  @dataset.query("SELECT ts FROM slack_messages ORDER BY ts DESC LIMIT 1").first[:ts] rescue 0
end

#purge_deleted_usersObject



160
161
162
# File 'lib/driftwood/bigquery.rb', line 160

def purge_deleted_users
  @dataset.query('DELETE FROM slack_users WHERE deleted = true')
end

#reconcile_user_creationsObject

Ensure that all users have creation dates. If they don’t have one yet, assume now This will catch any users who’ve been created during downtime if we restart the service



166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/driftwood/bigquery.rb', line 166

def reconcile_user_creations()
  @dataset.query("INSERT INTO
    slack_user_creation (team_id, user_id, creation)
    (
      SELECT team_id, user_id, CURRENT_TIMESTAMP()
      FROM
        slack_users
      WHERE
        user_id NOT IN
        (
          SELECT user_id from slack_user_creation
        )
    )")
end

#record_user_creation(team_id, user_id) ⇒ Object



154
155
156
157
158
# File 'lib/driftwood/bigquery.rb', line 154

def record_user_creation(team_id, user_id)
  @creation.insert(:team_id  => team_id,
                   :user_id  => user_id,
                   :creation => Time.now.strftime('%Y-%m-%d %H:%M:%S%z')).success?
end

#shellObject



185
186
187
188
# File 'lib/driftwood/bigquery.rb', line 185

def shell
  require 'pry'
  binding.pry
end