Class: UV::BufferedTokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/uv-rays/buffered_tokenizer.rb

Constant Summary collapse

DEFAULT_ENCODING =
'ASCII-8BIT'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ BufferedTokenizer

Returns a new instance of BufferedTokenizer.

Parameters:

  • options (Hash)


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/uv-rays/buffered_tokenizer.rb', line 24

def initialize(options)
    @delimiter  = options[:delimiter]
    @indicator  = options[:indicator]
    @msg_length = options[:msg_length]
    @size_limit = options[:size_limit]
    @min_length = options[:min_length] || 1
    @verbose    = options[:verbose] if @size_limit
    @encoding   = options[:encoding] || DEFAULT_ENCODING

    if @delimiter
        @extract_method = method(:delimiter_extract)
    elsif @indicator && @msg_length
        @extract_method = method(:length_extract)
    else
        raise ArgumentError, 'no delimiter provided'
    end

    init_buffer
end

Instance Attribute Details

#delimiterObject

Returns the value of attribute delimiter.



21
22
23
# File 'lib/uv-rays/buffered_tokenizer.rb', line 21

def delimiter
  @delimiter
end

#indicatorObject

Returns the value of attribute indicator.



21
22
23
# File 'lib/uv-rays/buffered_tokenizer.rb', line 21

def indicator
  @indicator
end

#size_limitObject

Returns the value of attribute size_limit.



21
22
23
# File 'lib/uv-rays/buffered_tokenizer.rb', line 21

def size_limit
  @size_limit
end

#verboseObject

Returns the value of attribute verbose.



21
22
23
# File 'lib/uv-rays/buffered_tokenizer.rb', line 21

def verbose
  @verbose
end

Instance Method Details

#empty?Boolean

Returns:

  • (Boolean)


71
72
73
# File 'lib/uv-rays/buffered_tokenizer.rb', line 71

def empty?
    @input.empty?
end

#extract(data) ⇒ Object

Extract takes an arbitrary string of input data and returns an array of tokenized entities, provided there were any available to extract.

Examples:


tokenizer.extract(data).
    map { |entity| Decode(entity) }.each { ... }

Parameters:

  • data (String)


53
54
55
56
57
58
# File 'lib/uv-rays/buffered_tokenizer.rb', line 53

def extract(data)
    data.force_encoding(@encoding)
    @input << data

    @extract_method.call
end

#flushString

Flush the contents of the input buffer, i.e. return the input buffer even though a token has not yet been encountered.

Returns:

  • (String)


64
65
66
67
68
# File 'lib/uv-rays/buffered_tokenizer.rb', line 64

def flush
    buffer = @input
    reset
    buffer
end