Module: Langusta

Defined in:
lib/langusta.rb,
lib/langusta/guard.rb,
lib/langusta/n_gram.rb,
lib/langusta/command.rb,
lib/langusta/version.rb,
lib/langusta/detector.rb,
lib/langusta/language.rb,
lib/langusta/inspector.rb,
lib/langusta/codepoints.rb,
lib/langusta/lang_profile.rb,
lib/langusta/regex_helper.rb,
lib/langusta/tag_extractor.rb,
lib/langusta/unicode_block.rb,
lib/langusta/detector_factory.rb,
lib/langusta/java_property_reader.rb,
lib/langusta/language_detection_facade.rb

Defined Under Namespace

Modules: Codepoints, Guard, Inspector, RegexHelper, UnicodeBlock Classes: Command, Detector, DetectorFactory, DuplicateProfilesError, Error, JavaPropertyReader, LangProfile, Language, LanguageDetectionFacade, NGram, NoFeaturesInTextError, NoProfilesLoadedError, TagExtractor

Constant Summary collapse

ABSOLUTE_PATH =
File.expand_path(File.join(File.dirname(__FILE__), '..'))
PROFILES_PATH =
File.join(ABSOLUTE_PATH, 'profiles')
UPPERCASE_BIN =
File.join(ABSOLUTE_PATH, 'data/uppercase.bin')
MESSAGES_PROPERTIES =
File.join(ABSOLUTE_PATH, 'data/messages.properties')
UTF82CP_SELECTOR =
RUBY_VERSION < "1.9" ? :utf82cp_18 : :utf82cp_19
CP2UTF8_SELECTOR =
RUBY_VERSION < "1.9" ? :cp2utf8_18 : :cp2utf8_19
VERSION =
"0.2.4"

Class Method Summary collapse

Class Method Details

.cp2utf8(cp_array) ⇒ Object



57
58
59
# File 'lib/langusta.rb', line 57

def self.cp2utf8(cp_array)
  send(CP2UTF8_SELECTOR, cp_array)
end

.cp2utf8_18(cp_array) ⇒ Object



61
62
63
# File 'lib/langusta.rb', line 61

def self.cp2utf8_18(cp_array)
  Iconv.conv('utf-8', 'ucs-2be', cp_array.pack('n*'))
end

.cp2utf8_19(cp_array) ⇒ Object



65
66
67
# File 'lib/langusta.rb', line 65

def self.cp2utf8_19(cp_array)
  cp_array.pack('n*').force_encoding('ucs-2be').encode('utf-8')
end

.utf82cp(utf8_string) ⇒ Object



45
46
47
# File 'lib/langusta.rb', line 45

def self.utf82cp(utf8_string)
  send(UTF82CP_SELECTOR, utf8_string)
end

.utf82cp_18(utf8_string) ⇒ Object



49
50
51
# File 'lib/langusta.rb', line 49

def self.utf82cp_18(utf8_string)
  Iconv.conv('ucs-2be', 'utf-8', utf8_string).unpack('n*')
end

.utf82cp_19(utf8_string) ⇒ Object



53
54
55
# File 'lib/langusta.rb', line 53

def self.utf82cp_19(utf8_string)
  utf8_string.encode('ucs-2be').unpack('n*')
end