Class: UV::BufferedTokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/uv-rays/buffered_tokenizer.rb

Constant Summary collapse

DEFAULT_ENCODING =
'ASCII-8BIT'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ BufferedTokenizer

Returns a new instance of BufferedTokenizer.

Parameters:

  • options (Hash)


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/uv-rays/buffered_tokenizer.rb', line 25

def initialize(options)
    @delimiter  = options[:delimiter]
    @indicator  = options[:indicator]
    @msg_length = options[:msg_length]
    @size_limit = options[:size_limit]
    @min_length = options[:min_length] || 1
    @verbose    = options[:verbose] if @size_limit
    @encoding   = options[:encoding] || DEFAULT_ENCODING

    if @delimiter
        @extract_method = method(:delimiter_extract)
    elsif @indicator && @msg_length
        @extract_method = method(:length_extract)
    else
        raise ArgumentError, 'no delimiter provided'
    end

    init_buffer
end

Instance Attribute Details

#delimiterObject

Returns the value of attribute delimiter.



22
23
24
# File 'lib/uv-rays/buffered_tokenizer.rb', line 22

def delimiter
  @delimiter
end

#indicatorObject

Returns the value of attribute indicator.



22
23
24
# File 'lib/uv-rays/buffered_tokenizer.rb', line 22

def indicator
  @indicator
end

#size_limitObject

Returns the value of attribute size_limit.



22
23
24
# File 'lib/uv-rays/buffered_tokenizer.rb', line 22

def size_limit
  @size_limit
end

#verboseObject

Returns the value of attribute verbose.



22
23
24
# File 'lib/uv-rays/buffered_tokenizer.rb', line 22

def verbose
  @verbose
end

Instance Method Details

#bytesizeInteger

Returns:

  • (Integer)


77
78
79
# File 'lib/uv-rays/buffered_tokenizer.rb', line 77

def bytesize
    @input.bytesize
end

#empty?Boolean

Returns:

  • (Boolean)


72
73
74
# File 'lib/uv-rays/buffered_tokenizer.rb', line 72

def empty?
    @input.empty?
end

#extract(data) ⇒ Object

Extract takes an arbitrary string of input data and returns an array of tokenized entities, provided there were any available to extract.

Examples:


tokenizer.extract(data).
    map { |entity| Decode(entity) }.each { ... }

Parameters:

  • data (String)


54
55
56
57
58
59
# File 'lib/uv-rays/buffered_tokenizer.rb', line 54

def extract(data)
    data.force_encoding(@encoding)
    @input << data

    @extract_method.call
end

#flushString

Flush the contents of the input buffer, i.e. return the input buffer even though a token has not yet been encountered.

Returns:

  • (String)


65
66
67
68
69
# File 'lib/uv-rays/buffered_tokenizer.rb', line 65

def flush
    buffer = @input
    reset
    buffer
end