Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/runicode.rb,
lib/runicode/utf8.rb

Overview

:nodoc:

Instance Method Summary collapse

Instance Method Details

#codepointsObject Also known as: chars

Collects the codepoints as an array.



31
32
33
34
35
# File 'lib/runicode.rb', line 31

def codepoints
  codepoints = []
  each_codepoint {|c| codepoints << c}
  return codepoints
end

#each_charObject

Yields to each codepoint of a string.

Raises:

  • (LocalJumpError)


25
26
27
28
# File 'lib/runicode.rb', line 25

def each_char
  raise LocalJumpError unless block_given?
  each_codepoint {|cp| yield cp.chr}
end

#each_codepointObject

Passes each character in str to the given block as an Integer. A rewrite of codepoints.

Raises:

  • (LocalJumpError)


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/runicode/utf8.rb', line 50

def each_codepoint
  raise LocalJumpError, 'no block given' unless block_given?

  steal_next = 0
  current_bytes = nil

  to_s.each_byte {|b|
    if steal_next==0 # No octets need to be stolen
      unless current_bytes==nil
        current_bytes.each {|byte|
          if RUnicode::UTF8::INVALID_BYTES.include? byte
            raise IOError, 'ill-formed UTF-8'
          end
        }
        yield RUnicode::UTF8::bytes_to_char(current_bytes)
      end
      current_bytes = [b]
      if b >> 7 == 0b0
        steal_next = 0
      elsif b >> 5 == 0b110
        steal_next = 1
      elsif b >> 4 == 0b1110
        steal_next = 2
      elsif b >> 3 == 0b11110
        steal_next = 3
      else
        raise IOError, 'ill-formed UTF-8'
      end

    else # We have some octets to steal
      raise IOError, 'ill-formed UTF-8' unless b >> 6 == 0b10
      current_bytes << b
      steal_next -= 1
    end
  }

  # One last time:
  unless current_bytes==nil
    current_bytes.each {|byte|
        if RUnicode::UTF8::INVALID_BYTES.include? byte
          raise IOError, 'ill-formed UTF-8'
        end
      }
    yield RUnicode::UTF8::bytes_to_char(current_bytes)
  end

  raise IOError, 'ill-formed UTF-8' if steal_next != 0

end