Class: Embulk::Parser::QueryString

Inherits:
ParserPlugin
  • Object
show all
Defined in:
lib/embulk/parser/query_string.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.parse(line, options = {}) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/embulk/parser/query_string.rb', line 67

def self.parse(line, options = {})
  if options[:capture]
    line = line.match(options[:capture]).to_a[1] || ""
    # TODO: detect incorrect regexp given
  end

  return if line == ""

  line.strip! if options[:strip_whitespace]
  if options[:strip_quote]
    line = line[/\A(?:["'])?(.*?)(?:["'])?\z/, 1]
  end

  begin
    uri = Addressable::URI.parse("?#{line}")
    if valid_query_string?(uri.query)
      uri.query_values(Hash)
    else
      nil
    end
  rescue ArgumentError
    Embulk.logger.warn "Failed parse: #{line}"
    nil
  end
end

.transaction(config) {|task, columns| ... } ⇒ Object

Yields:

  • (task, columns)


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/embulk/parser/query_string.rb', line 9

def self.transaction(config, &control)
  decoder_task = config.load_config(Java::LineDecoder::DecoderTask)

  task = {
    "decoder" => DataSource.from_java(decoder_task.dump),
    "strip_quote" => config.param("strip_quote", :bool, default: true),
    "strip_whitespace" => config.param("strip_whitespace", :bool, default: true),
    "capture" => config.param("capture", :string, default: nil),
  }

  columns = []
  schema = config.param("columns", :array, default: [])
  schema.each do |column|
    name = column["name"]
    type = column["type"].to_sym

    columns << Column.new(nil, name, type)
  end

  yield(task, columns)
end

.valid_query_string?(qs) ⇒ Boolean

Returns:

  • (Boolean)


53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/embulk/parser/query_string.rb', line 53

def self.valid_query_string?(qs)
  if qs.match(/[\s]/)
    Embulk.logger.warn "'#{qs}' contains unescaped space"
    return false
  end

  if qs.match(/[^\x20-\x7e]/)
    Embulk.logger.warn "'#{qs}' contains non-ascii character (maybe unescaped)"
    return false
  end

  true
end

Instance Method Details

#initObject



31
32
33
34
35
36
37
38
39
# File 'lib/embulk/parser/query_string.rb', line 31

def init
  @options = {
    strip_quote: task["strip_quote"],
    strip_whitespace: task["strip_whitespace"],
    capture: task["capture"],
  }

  @decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask)
end

#run(file_input) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
# File 'lib/embulk/parser/query_string.rb', line 41

def run(file_input)
  decoder = Java::LineDecoder.new(file_input.to_java, @decoder)

  while decoder.nextFile
    while line = decoder.poll
      process_line(line)
    end
  end

  page_builder.finish
end