Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/hebrew.rb

Overview

extend String class

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.is_codepoint_nikkud_cp1255(cp) ⇒ Object



93
94
95
96
# File 'lib/hebrew.rb', line 93

def self.is_codepoint_nikkud_cp1255(cp)
  return ((cp > 191 && cp < 205) or [209, 210].include?(cp))
  #NIKKUD_CP1255.include?(cp) # cleaner, but much slower
end

.is_codepoint_nikkud_utf8(cp) ⇒ Object

NIKKUD_CP1255.include?(cp) # cleaner, but much slower



97
98
99
100
# File 'lib/hebrew.rb', line 97

def self.is_codepoint_nikkud_utf8(cp)
  return ((cp > 0x05af && cp < 0x05bd) or [0x05c1, 0x05c2].include?(cp))
  #NIKKUD_UTF8.include?(cp) # cleaner, but much slower
end

.is_final_by_encoding(c, encoding) ⇒ Object

this will return true if the first parameter is a final letter in the encoding of the second parameter



112
113
114
115
116
117
118
119
# File 'lib/hebrew.rb', line 112

def self.is_final_by_encoding(c, encoding)
  case encoding
  when Encoding::UTF_8
    FIANLS_UTF8.include?(c)
  when Encoding::WINDOWS_1255 || Encoding::CP1255
    FINALS_CP1255.include?(c)
  end
end

.is_nikkud_by_encoding(c, encoding) ⇒ Object

this will return true if the first parameter is a nikkud character in the encoding of the second parameter



102
103
104
105
106
107
108
109
110
# File 'lib/hebrew.rb', line 102

def self.is_nikkud_by_encoding(c, encoding)
  case encoding
  when Encoding::UTF_8
    self.is_codepoint_nikkud_utf8(c.codepoints.first)
  when Encoding::WINDOWS_1255 || Encoding::CP1255
    self.is_codepoint_nikkud_cp1255(c.codepoints.first)
  # TODO: add Mac encoding?
  end
end

Instance Method Details

#any_hebrew?Boolean

this will return true if the string contains any Hebrew character (short circuit)

Returns:

  • (Boolean)


49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/hebrew.rb', line 49

def any_hebrew?
  case self.encoding
  when Encoding::UTF_8
    self.each_codepoint {|cp| return true if is_hebrew_codepoint_utf8(cp) }
    return false
  when Encoding::WINDOWS_1255 || Encoding::CP1255
    self.each_codepoint {|cp| return true if is_hebrew_codepoint_cp1255(cp) }
    return false
  else
    return false
  end
end

#any_nikkud?Boolean

Returns:

  • (Boolean)


66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/hebrew.rb', line 66

def any_nikkud?
  func = case self.encoding
    when Encoding::UTF_8
      :is_codepoint_nikkud_utf8
    when Encoding::WINDOWS_1255 || Encoding::CP1255
      :is_codepoint_nikkud_cp1255
    else
      :falsehood
    end
  self.each_codepoint{|cp| return true if String.send(func, cp)}
  return false
end

#falsehoodObject



62
63
64
# File 'lib/hebrew.rb', line 62

def falsehood
  false
end

#is_hebrew_codepoint_cp1255(cp) ⇒ Object



79
80
81
# File 'lib/hebrew.rb', line 79

def is_hebrew_codepoint_cp1255(cp)
  return ((cp > 191 && cp < 202) or [203, 204, 209, 210].include?(cp))
end

#is_hebrew_codepoint_utf8(cp) ⇒ Object



82
83
84
# File 'lib/hebrew.rb', line 82

def is_hebrew_codepoint_utf8(cp)
  return (cp >= HEB_UTF8_START && cp <= HEB_UTF8_END)
end

#is_nikkud(c) ⇒ Object

this will return true if the parameter is a nikkud character



89
90
91
# File 'lib/hebrew.rb', line 89

def is_nikkud(c)
  self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding
end

#strip_nikkudObject

this will return the string, stripped of any Hebrew nikkud characters



22
23
24
25
26
27
28
29
# File 'lib/hebrew.rb', line 22

def strip_nikkud
  case self.encoding
  when Encoding::UTF_8
    strip_nikkud_utf8
  when Encoding::WINDOWS_1255 || Encoding::CP1255
    strip_nikkud_cp1255
  end
end

#strip_nikkud_cp1255Object



30
31
32
33
34
35
36
37
38
# File 'lib/hebrew.rb', line 30

def strip_nikkud_cp1255
  target = ''.force_encoding('windows-1255')
  self.each_codepoint {|cp|
    unless self.class.is_codepoint_nikkud_cp1255(cp)
      target << cp.chr(Encoding::CP1255) # is there a neater way?
    end
  }
  return target
end

#strip_nikkud_utf8Object



39
40
41
42
43
44
45
46
47
# File 'lib/hebrew.rb', line 39

def strip_nikkud_utf8
  target = ''
  self.each_codepoint {|cp|
    unless self.class.is_codepoint_nikkud_utf8(cp)
      target << cp.chr(Encoding::UTF_8)
    end
  }
  return target
end