Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/hebrew.rb
Overview
extend String class
Class Method Summary collapse
- .is_codepoint_nikkud_cp1255(cp) ⇒ Object
-
.is_codepoint_nikkud_utf8(cp) ⇒ Object
NIKKUD_CP1255.include?(cp) # cleaner, but much slower.
-
.is_final_by_encoding(c, encoding) ⇒ Object
this will return true if the first parameter is a final letter in the encoding of the second parameter.
-
.is_nikkud_by_encoding(c, encoding) ⇒ Object
this will return true if the first parameter is a nikkud character in the encoding of the second parameter.
Instance Method Summary collapse
-
#any_hebrew? ⇒ Boolean
this will return true if the string contains any Hebrew character (short circuit).
- #any_nikkud? ⇒ Boolean
- #falsehood ⇒ Object
- #is_hebrew_codepoint_cp1255(cp) ⇒ Object
- #is_hebrew_codepoint_utf8(cp) ⇒ Object
-
#is_nikkud(c) ⇒ Object
this will return true if the parameter is a nikkud character.
-
#strip_nikkud ⇒ Object
this will return the string, stripped of any Hebrew nikkud characters.
- #strip_nikkud_cp1255 ⇒ Object
- #strip_nikkud_utf8 ⇒ Object
Class Method Details
.is_codepoint_nikkud_cp1255(cp) ⇒ Object
93 94 95 96 |
# File 'lib/hebrew.rb', line 93 def self.is_codepoint_nikkud_cp1255(cp) return ((cp > 191 && cp < 205) or [209, 210].include?(cp)) #NIKKUD_CP1255.include?(cp) # cleaner, but much slower end |
.is_codepoint_nikkud_utf8(cp) ⇒ Object
NIKKUD_CP1255.include?(cp) # cleaner, but much slower
97 98 99 100 |
# File 'lib/hebrew.rb', line 97 def self.is_codepoint_nikkud_utf8(cp) return ((cp > 0x05af && cp < 0x05bd) or [0x05c1, 0x05c2].include?(cp)) #NIKKUD_UTF8.include?(cp) # cleaner, but much slower end |
.is_final_by_encoding(c, encoding) ⇒ Object
this will return true if the first parameter is a final letter in the encoding of the second parameter
112 113 114 115 116 117 118 119 |
# File 'lib/hebrew.rb', line 112 def self.is_final_by_encoding(c, encoding) case encoding when Encoding::UTF_8 FIANLS_UTF8.include?(c) when Encoding::WINDOWS_1255 || Encoding::CP1255 FINALS_CP1255.include?(c) end end |
.is_nikkud_by_encoding(c, encoding) ⇒ Object
this will return true if the first parameter is a nikkud character in the encoding of the second parameter
102 103 104 105 106 107 108 109 110 |
# File 'lib/hebrew.rb', line 102 def self.is_nikkud_by_encoding(c, encoding) case encoding when Encoding::UTF_8 self.is_codepoint_nikkud_utf8(c.codepoints.first) when Encoding::WINDOWS_1255 || Encoding::CP1255 self.is_codepoint_nikkud_cp1255(c.codepoints.first) # TODO: add Mac encoding? end end |
Instance Method Details
#any_hebrew? ⇒ Boolean
this will return true if the string contains any Hebrew character (short circuit)
49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/hebrew.rb', line 49 def any_hebrew? case self.encoding when Encoding::UTF_8 self.each_codepoint {|cp| return true if is_hebrew_codepoint_utf8(cp) } return false when Encoding::WINDOWS_1255 || Encoding::CP1255 self.each_codepoint {|cp| return true if is_hebrew_codepoint_cp1255(cp) } return false else return false end end |
#any_nikkud? ⇒ Boolean
66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/hebrew.rb', line 66 def any_nikkud? func = case self.encoding when Encoding::UTF_8 :is_codepoint_nikkud_utf8 when Encoding::WINDOWS_1255 || Encoding::CP1255 :is_codepoint_nikkud_cp1255 else :falsehood end self.each_codepoint{|cp| return true if String.send(func, cp)} return false end |
#falsehood ⇒ Object
62 63 64 |
# File 'lib/hebrew.rb', line 62 def falsehood false end |
#is_hebrew_codepoint_cp1255(cp) ⇒ Object
79 80 81 |
# File 'lib/hebrew.rb', line 79 def is_hebrew_codepoint_cp1255(cp) return ((cp > 191 && cp < 202) or [203, 204, 209, 210].include?(cp)) end |
#is_hebrew_codepoint_utf8(cp) ⇒ Object
82 83 84 |
# File 'lib/hebrew.rb', line 82 def is_hebrew_codepoint_utf8(cp) return (cp >= HEB_UTF8_START && cp <= HEB_UTF8_END) end |
#is_nikkud(c) ⇒ Object
this will return true if the parameter is a nikkud character
89 90 91 |
# File 'lib/hebrew.rb', line 89 def is_nikkud(c) self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding end |
#strip_nikkud ⇒ Object
this will return the string, stripped of any Hebrew nikkud characters
22 23 24 25 26 27 28 29 |
# File 'lib/hebrew.rb', line 22 def strip_nikkud case self.encoding when Encoding::UTF_8 strip_nikkud_utf8 when Encoding::WINDOWS_1255 || Encoding::CP1255 strip_nikkud_cp1255 end end |
#strip_nikkud_cp1255 ⇒ Object
30 31 32 33 34 35 36 37 38 |
# File 'lib/hebrew.rb', line 30 def strip_nikkud_cp1255 target = ''.force_encoding('windows-1255') self.each_codepoint {|cp| unless self.class.is_codepoint_nikkud_cp1255(cp) target << cp.chr(Encoding::CP1255) # is there a neater way? end } return target end |
#strip_nikkud_utf8 ⇒ Object
39 40 41 42 43 44 45 46 47 |
# File 'lib/hebrew.rb', line 39 def strip_nikkud_utf8 target = '' self.each_codepoint {|cp| unless self.class.is_codepoint_nikkud_utf8(cp) target << cp.chr(Encoding::UTF_8) end } return target end |