Class: OdpsDatahub::OdpsTable

Inherits:
Object
  • Object
show all
Defined in:
lib/fluent/plugin/odps/odps_table.rb

Instance Method Summary collapse

Constructor Details

#initialize(odpsConfig, projectName, tableName) ⇒ OdpsTable

Returns a new instance of OdpsTable.



144
145
146
147
148
# File 'lib/fluent/plugin/odps/odps_table.rb', line 144

def initialize(odpsConfig, projectName, tableName)
  @mOdpsConfig = odpsConfig
  @mProjectName = projectName
  @mTableName = tableName
end

Instance Method Details

#addPartition(ptStr) ⇒ Object

ptStr ex: ‘dt=20150805,hh=08,mm=24’ call add partiton if not exsits



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/fluent/plugin/odps/odps_table.rb', line 197

def addPartition(ptStr)
  pts_array = ptStr.split(',')
  sqlstr = "ALTER TABLE " + @mProjectName + "." + @mTableName
  sqlstr = sqlstr +  " ADD IF NOT EXISTS" + " PARTITION ("
  pts_array.each { |pt|
    ptkv = pt.split('=')
    if ptkv.size != 2
      raise "invalid partition spec" + pt
    end
    sqlstr += ptkv[0] + '=' + "'" + ptkv[1] + "'" + ','
  }
  sqlstr = sqlstr[0..-2] + ");"
  taskName = "SQLAddPartitionTask"
  runSQL(taskName, sqlstr)
end

#genJobXml(name, priority, comment, taskStr, runMode = 'sequence') ⇒ Object

TODO support mulit task



240
241
242
243
# File 'lib/fluent/plugin/odps/odps_table.rb', line 240

def genJobXml(name, priority, comment, taskStr, runMode='sequence')
  job_xml = XmlTemplate.getJobXml(name, priority, comment, taskStr, runMode)
  return job_xml
end

#getPartitionListObject

get partitions and return an array like :[“place”=>“china2”,“place”=>“china”]



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/fluent/plugin/odps/odps_table.rb', line 151

def getPartitionList
  partitionList = Array.new
  url = "/projects/" + @mProjectName +"/tables/" +  @mTableName
  lastMarker = nil
  isEnd = false
  while !isEnd do
    header = Hash.new
    param = Hash.new
    param[$PARAM_CURR_PROJECT] = @mProjectName
    param[$PARAM_EXPECT_MARKER] = true
    param[$PARAM_PARTITIONS] = ""
    if lastMarker != nil
      param[$PARAM_MARKER] = lastMarker
    end
    conn = HttpConnection.new(@mOdpsConfig, header, param, url, "GET", "", true)
    res = conn.getResponse
    if res.code != "200"
      return partitionList
      #raise OdpsDatahubException.new($INVALID_ARGUMENT, "This not a partitioned table")

    end

    doc = REXML::Document.new(res.body.to_s)

    #parse partitions

    partitionsXml = doc.root.get_elements("Partition")
    partitionsXml.each { |partition|
      partitionInfo = Hash.new
      partition.elements.each { |column|
        partitionInfo[column.attributes["Name"]] = column.attributes["Value"]
      }
      partitionList.push(partitionInfo)
    }

    #get marker

    markerXml = doc.root.get_elements("Marker")
    if markerXml[0].text == nil
      isEnd = true
    elsif
      lastMarker = markerXml[0].text
    end
  end
  return partitionList
end

#getTaskResult(instanceurl, name) ⇒ Object



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/fluent/plugin/odps/odps_table.rb', line 275

def getTaskResult(instanceurl, name)
  headers = Hash.new
  params = Hash.new
  params['result'] = ""
  res = nil

  conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
  res = conn.getResponse
  doc = REXML::Document.new(res.body.to_s)
  doc.root.elements.each('Tasks/Task') { |e|
    taskname = e.elements['Name'].text
    if taskname == name.to_s
      return e.elements['Result'].cdatas().to_s
    end
  }
end

#runSQL(taskName, sqlstring) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/fluent/plugin/odps/odps_table.rb', line 213

def runSQL(taskName, sqlstring)
  task_xml = XmlTemplate.getTaskXml(taskName, sqlstring)

  job_xml = genJobXml('arbitriary_job', '9', "", task_xml)
  headers = Hash.new
  headers['Content-Type'] = 'application/xml'
  headers['Content-MD5'] = Digest::MD5.hexdigest(job_xml)
  headers['Content-Length'] = job_xml.size.to_s

  params = Hash.new

  url = "/projects/" + @mProjectName +"/instances"
  conn = HttpConnection.new(@mOdpsConfig, headers, params, url, 'POST', job_xml, true)

  res = conn.getResponse
  if res.code != '200'
    raise "Add partition failed with error " + res.body
  end

  if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
    raise res.body
  end

  waitForSQLComplete(res)
end

#waitForSQLComplete(res) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/fluent/plugin/odps/odps_table.rb', line 245

def waitForSQLComplete(res)
  ret_headers = res.to_hash
  instanceurl =  "/projects/" + @mProjectName +"/instances" + "/" + ret_headers['location'][0].split('/')[-1]

  headers = Hash.new
  params = Hash.new
  params['taskstatus'] = ""
  res = nil

  while true
    conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
    res = conn.getResponse
    doc = REXML::Document.new(res.body.to_s)
    insStatus = doc.root.elements["Status"].text
    if insStatus == 'Terminated'
      break;
    elsif insStatus == 'Running' or insStatus == 'Suspended'
      sleep(5)
    end
  end

  doc.root.elements.each('Tasks/Task') { |e|
    status = e.elements['Status'].text
    name = e.elements['Name'].text
    if status.to_s != 'Success'
      raise getTaskResult(instanceurl, name.to_s)
    end
  }
end