Class: OdpsDatahub::OdpsTable

Inherits:
Object
  • Object
show all
Defined in:
lib/fluent/plugin/odps/odps_table.rb

Instance Method Summary collapse

Constructor Details

#initialize(odpsConfig, projectName, tableName) ⇒ OdpsTable

Returns a new instance of OdpsTable.



105
106
107
108
109
# File 'lib/fluent/plugin/odps/odps_table.rb', line 105

def initialize(odpsConfig, projectName, tableName)
  @mOdpsConfig = odpsConfig
  @mProjectName = projectName
  @mTableName = tableName
end

Instance Method Details

#addPartition(ptStr) ⇒ Object

ptStr ex: ‘dt=20150805,hh=08,mm=24’ call add partiton if not exsits



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/fluent/plugin/odps/odps_table.rb', line 158

def addPartition(ptStr)
  pts_array = ptStr.split(',')
  sqlstr = "ALTER TABLE " + @mProjectName + "." + @mTableName
  sqlstr = sqlstr +  " ADD IF NOT EXISTS" + " PARTITION ("
  pts_array.each { |pt|
    ptkv = pt.split('=')
    if ptkv.size != 2
      raise "invalid partition spec" + pt
    end
    sqlstr += ptkv[0] + '=' + "'" + ptkv[1] + "'" + ','
  }
  sqlstr = sqlstr[0..-2] + ");"
  taskName = "SQLAddPartitionTask"
  runSQL(taskName, sqlstr)
end

#genJobXml(name, priority, comment, taskStr, runMode = 'sequence') ⇒ Object

TODO support mulit task



201
202
203
204
# File 'lib/fluent/plugin/odps/odps_table.rb', line 201

def genJobXml(name, priority, comment, taskStr, runMode='sequence')
  job_xml = XmlTemplate.getJobXml(name, priority, comment, taskStr, runMode)
  return job_xml
end

#getPartitionListObject

get partitions and return an array like :[“place”=>“china2”,“place”=>“china”]



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/fluent/plugin/odps/odps_table.rb', line 112

def getPartitionList
  partitionList = Array.new
  url = "/projects/" + @mProjectName +"/tables/" +  @mTableName
  lastMarker = nil
  isEnd = false
  while !isEnd do
    header = Hash.new
    param = Hash.new
    param[$PARAM_CURR_PROJECT] = @mProjectName
    param[$PARAM_EXPECT_MARKER] = true
    param[$PARAM_PARTITIONS] = ""
    if lastMarker != nil
      param[$PARAM_MARKER] = lastMarker
    end
    conn = HttpConnection.new(@mOdpsConfig, header, param, url, "GET", "", true)
    res = conn.getResponse
    if res.code != "200"
      return partitionList
      #raise OdpsDatahubException.new($INVALID_ARGUMENT, "This not a partitioned table")
    end

    doc = REXML::Document.new(res.body.to_s)

    #parse partitions
    partitionsXml = doc.root.get_elements("Partition")
    partitionsXml.each { |partition|
      partitionInfo = Hash.new
      partition.elements.each { |column|
        partitionInfo[column.attributes["Name"]] = column.attributes["Value"]
      }
      partitionList.push(partitionInfo)
    }

    #get marker
    markerXml = doc.root.get_elements("Marker")
    if markerXml[0].text == nil
      isEnd = true
    elsif
      lastMarker = markerXml[0].text
    end
  end
  return partitionList
end

#getTaskResult(instanceurl, name) ⇒ Object



236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/fluent/plugin/odps/odps_table.rb', line 236

def getTaskResult(instanceurl, name)
  headers = Hash.new
  params = Hash.new
  params['result'] = ""
  res = nil

  conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
  res = conn.getResponse
  doc = REXML::Document.new(res.body.to_s)
  doc.root.elements.each('Tasks/Task') { |e|
    taskname = e.elements['Name'].text
    if taskname == name.to_s
      return e.elements['Result'].cdatas().to_s
    end
  }
end

#runSQL(taskName, sqlstring) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/fluent/plugin/odps/odps_table.rb', line 174

def runSQL(taskName, sqlstring)
  task_xml = XmlTemplate.getTaskXml(taskName, sqlstring)

  job_xml = genJobXml('arbitriary_job', '9', "", task_xml)
  headers = Hash.new
  headers['Content-Type'] = 'application/xml'
  headers['Content-MD5'] = Digest::MD5.hexdigest(job_xml)
  headers['Content-Length'] = job_xml.size.to_s

  params = Hash.new

  url = "/projects/" + @mProjectName +"/instances"
  conn = HttpConnection.new(@mOdpsConfig, headers, params, url, 'POST', job_xml, true)

  res = conn.getResponse
  if res.code != '200'
    raise "Add partition failed with error" + res.code.to_s
  end

  if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
    raise res.body
  end

  waitForSQLComplete(res)
end

#waitForSQLComplete(res) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/fluent/plugin/odps/odps_table.rb', line 206

def waitForSQLComplete(res)
  ret_headers = res.to_hash
  instanceurl =  "/projects/" + @mProjectName +"/instances" + "/" + ret_headers['location'][0].split('/')[-1]

  headers = Hash.new
  params = Hash.new
  params['taskstatus'] = ""
  res = nil

  while true
    conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
    res = conn.getResponse
    doc = REXML::Document.new(res.body.to_s)
    insStatus = doc.root.elements["Status"].text
    if insStatus == 'Terminated'
      break;
    elsif insStatus == 'Running' or insStatus == 'Suspended'
      sleep(5)
    end
  end

  doc.root.elements.each('Tasks/Task') { |e|
    status = e.elements['Status'].text
    name = e.elements['Name'].text
    if status.to_s != 'Success'
      raise getTaskResult(instanceurl, name.to_s)
    end
  }
end