Module: Langusta
- Defined in:
- lib/langusta.rb,
lib/langusta/guard.rb,
lib/langusta/n_gram.rb,
lib/langusta/command.rb,
lib/langusta/version.rb,
lib/langusta/detector.rb,
lib/langusta/language.rb,
lib/langusta/inspector.rb,
lib/langusta/codepoints.rb,
lib/langusta/lang_profile.rb,
lib/langusta/regex_helper.rb,
lib/langusta/tag_extractor.rb,
lib/langusta/unicode_block.rb,
lib/langusta/detector_factory.rb,
lib/langusta/java_property_reader.rb,
lib/langusta/language_detection_facade.rb
Defined Under Namespace
Modules: Codepoints, Guard, Inspector, RegexHelper, UnicodeBlock
Classes: Command, Detector, DetectorFactory, DuplicateProfilesError, Error, JavaPropertyReader, LangProfile, Language, LanguageDetectionFacade, NGram, NoFeaturesInTextError, NoProfilesLoadedError, TagExtractor
Constant Summary
collapse
- ABSOLUTE_PATH =
File.expand_path(File.join(File.dirname(__FILE__), '..'))
- PROFILES_PATH =
File.join(ABSOLUTE_PATH, 'profiles')
- UPPERCASE_BIN =
File.join(ABSOLUTE_PATH, 'data/uppercase.bin')
- MESSAGES_PROPERTIES =
File.join(ABSOLUTE_PATH, 'data/messages.properties')
- UTF82CP_SELECTOR =
RUBY_VERSION < "1.9" ? :utf82cp_18 : :utf82cp_19
- CP2UTF8_SELECTOR =
RUBY_VERSION < "1.9" ? :cp2utf8_18 : :cp2utf8_19
- VERSION =
"0.2.4"
Class Method Summary
collapse
Class Method Details
.cp2utf8(cp_array) ⇒ Object
57
58
59
|
# File 'lib/langusta.rb', line 57
def self.cp2utf8(cp_array)
send(CP2UTF8_SELECTOR, cp_array)
end
|
.cp2utf8_18(cp_array) ⇒ Object
61
62
63
|
# File 'lib/langusta.rb', line 61
def self.cp2utf8_18(cp_array)
Iconv.conv('utf-8', 'ucs-2be', cp_array.pack('n*'))
end
|
.cp2utf8_19(cp_array) ⇒ Object
65
66
67
|
# File 'lib/langusta.rb', line 65
def self.cp2utf8_19(cp_array)
cp_array.pack('n*').force_encoding('ucs-2be').encode('utf-8')
end
|
.utf82cp(utf8_string) ⇒ Object
45
46
47
|
# File 'lib/langusta.rb', line 45
def self.utf82cp(utf8_string)
send(UTF82CP_SELECTOR, utf8_string)
end
|
.utf82cp_18(utf8_string) ⇒ Object
49
50
51
|
# File 'lib/langusta.rb', line 49
def self.utf82cp_18(utf8_string)
Iconv.conv('ucs-2be', 'utf-8', utf8_string).unpack('n*')
end
|
.utf82cp_19(utf8_string) ⇒ Object
53
54
55
|
# File 'lib/langusta.rb', line 53
def self.utf82cp_19(utf8_string)
utf8_string.encode('ucs-2be').unpack('n*')
end
|