Class: Rpdf2txt::LZW

Inherits:
Object show all
Defined in:
lib/rpdf2txt/lzw.rb

Constant Summary collapse

CLEAR =
256
EOD =
257

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(early_change = 1) ⇒ LZW

Returns a new instance of LZW.



11
12
13
14
15
# File 'lib/rpdf2txt/lzw.rb', line 11

def initialize(early_change=1)
  @early_change = early_change
  @__dict = (0..255).collect { |num| num.chr }.push :clear, :eod
  init_dictionary
end

Class Method Details

.decode(data, early_change = 1) ⇒ Object



8
9
10
# File 'lib/rpdf2txt/lzw.rb', line 8

def self.decode data, early_change=1
  self.new(early_change).decode data
end

Instance Method Details

#decode(data) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/rpdf2txt/lzw.rb', line 21

def decode data
  bits, = data.unpack('B*')
  expected_codes = bits.size / 12
  code = old_code = 0
  result = ''
  max = codes = clears = 0
  while(!bits.empty? && (code = get_next_code bits) && code != EOD)
    codes += 1
    if code == CLEAR
      clears +=1 
      init_dictionary
      code = get_next_code bits
      if code.nil? || code == EOD 
        return result
      end
      result << @dictionary[code]
      old_code = code
    else
      if string = @dictionary[code]
        result << string
        update_dictionary @dictionary[old_code] + string[0,1]
        old_code = code
      elsif code == @dictionary.size
        string = @dictionary[old_code]
        string += string[0,1]
        result << string
        update_dictionary string
        old_code = code
      else
        raise 'Bad compressed code: %s' % code
      end
    end
    max = [max, result.size].max
  end
  result
end

#get_next_code(bits) ⇒ Object



57
58
59
# File 'lib/rpdf2txt/lzw.rb', line 57

def get_next_code bits
  chunk = bits.slice!(0, @code_length).to_i(2)
end

#init_dictionaryObject



16
17
18
19
20
# File 'lib/rpdf2txt/lzw.rb', line 16

def init_dictionary
  @dictionary = @__dict.dup
  @code_length = 9
  @boundary = 512 - @early_change
end

#update_dictionary(str) ⇒ Object



60
61
62
63
64
65
66
67
# File 'lib/rpdf2txt/lzw.rb', line 60

def update_dictionary(str)
  @dictionary.push str
  if @dictionary.size >= @boundary && @code_length < 12
    @code_length += 1
    @boundary = (2**@code_length - @early_change)
  end
  str
end