Class: Embulk::Filter::GoogleVisionApi

Inherits:
FilterPlugin
  • Object
show all
Defined in:
lib/embulk/filter/google_vision_api.rb

Constant Summary collapse

ENDPOINT_PREFIX =
"https://vision.googleapis.com/v1/images:annotate"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.transaction(config, in_schema) {|task, out_columns| ... } ⇒ Object

Yields:

  • (task, out_columns)


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/embulk/filter/google_vision_api.rb', line 15

def self.transaction(config, in_schema, &control)
  task = {
    "out_key_name" => config.param("out_key_name", :string),
    "image_path_key_name" => config.param("image_path_key_name", :string),
    "features" => config.param("features", :array),
    "delay" => config.param("delay", :integer, default: 0),
    "image_num_per_request" => config.param("image_num_per_request", :integer, default: 16),
    "google_api_key" => config.param("google_api_key", :string, default: ENV['GOOGLE_API_KEY']),
  }

  add_columns = [
    Column.new(nil, task["out_key_name"], :json)
  ]

  out_columns = in_schema + add_columns

  yield(task, out_columns)
end

Instance Method Details

#add(page) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/embulk/filter/google_vision_api.rb', line 50

def add(page)
  record_groups = page.map { |record|
    Hash[in_schema.names.zip(record)]
  }.each_slice(@image_num_per_request).to_a

  record_groups.each do |records|
    requests = []
    records.each do |record|
      request = {
        image: {},
        features: @features
      }
      image_body = get_image_body(record)
      request[:image][:content] = Base64.encode64(image_body)
      requests << request
    end
    body = {
      requests: requests
    }
    @post.body = body.to_json
    Embulk.logger.debug "request body => #{@post.body}"

    response_hash = {}
    @http.start do |h|
      response = h.request(@post)
      response_hash = JSON.parse(response.body)
    end
    records.each_with_index do |record, i|
      recognized = response_hash['responses'][i]
      Embulk.logger.warn "Error image => [#{record[@image_path_key_name]}] #{recognized}" if recognized.key?("error")
      page_builder.add(record.values + [recognized])
    end

    sleep @delay
  end
end

#closeObject



47
48
# File 'lib/embulk/filter/google_vision_api.rb', line 47

def close
end

#finishObject



87
88
89
# File 'lib/embulk/filter/google_vision_api.rb', line 87

def finish
  page_builder.finish
end

#initObject



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/embulk/filter/google_vision_api.rb', line 34

def init
  @uri = URI.parse("#{ENDPOINT_PREFIX}?key=#{task['google_api_key']}")
  @http = Net::HTTP.new(@uri.host, @uri.port)
  @http.use_ssl = true
  @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  @post = Net::HTTP::Post.new(@uri.request_uri, initheader = {'Content-Type' =>'application/json'})
  @image_path_key_name = task['image_path_key_name']
  @out_key_name = task['out_key_name']
  @delay = task['delay']
  @image_num_per_request = task['image_num_per_request']
  @features = task['features']
end