Class: UnihanLang::Unihan

Inherits:
Object
  • Object
show all
Defined in:
lib/unihan_lang.rb

Instance Method Summary collapse

Constructor Details

#initializeUnihan

Returns a new instance of Unihan.



10
11
12
13
# File 'lib/unihan_lang.rb', line 10

def initialize
  @chinese_processor = ChineseProcessor.new
  @variant_mapping = VariantMapping.new
end

Instance Method Details

#analyze_with_variants(text) ⇒ Object



55
56
57
58
59
60
61
62
# File 'lib/unihan_lang.rb', line 55

def analyze_with_variants(text)
  analyzer = ChineseScoreAnalyzer.new(text, @chinese_processor, @variant_mapping)
  {
    traditional_score: analyzer.traditional_score,
    simplified_score: analyzer.simplified_score,
    total_chinese: analyzer.total_chinese,
  }
end

#contains_chinese?(text) ⇒ Boolean

Returns:

  • (Boolean)


39
40
41
# File 'lib/unihan_lang.rb', line 39

def contains_chinese?(text)
  text.chars.any? { |char| @chinese_processor.chinese_character?(char) }
end

#contains_zh_cn?(text) ⇒ Boolean

Returns:

  • (Boolean)


35
36
37
# File 'lib/unihan_lang.rb', line 35

def contains_zh_cn?(text)
  text.chars.any? { |char| @chinese_processor.only_zh_cn?(char) }
end

#contains_zh_tw?(text) ⇒ Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/unihan_lang.rb', line 31

def contains_zh_tw?(text)
  text.chars.any? { |char| @chinese_processor.only_zh_tw?(char) }
end

#determine_language(text) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/unihan_lang.rb', line 47

def determine_language(text)
  case language_ratio(text)
  when :tw then "ZH_TW"
  when :cn then "ZH_CN"
  else "Unknown"
  end
end

#determine_language_with_variants(text) ⇒ Object



64
65
66
67
# File 'lib/unihan_lang.rb', line 64

def determine_language_with_variants(text)
  analyzer = ChineseScoreAnalyzer.new(text, @chinese_processor, @variant_mapping)
  analyzer.dominant_language
end

#extract_chinese_characters(text) ⇒ Object



43
44
45
# File 'lib/unihan_lang.rb', line 43

def extract_chinese_characters(text)
  text.chars.select { |char| @chinese_processor.chinese_character?(char) }
end

#only_zh_cn?(text) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/unihan_lang.rb', line 27

def only_zh_cn?(text)
  text.chars.all? { |char| @chinese_processor.only_zh_cn?(char) }
end

#only_zh_tw?(text) ⇒ Boolean

Returns:

  • (Boolean)


23
24
25
# File 'lib/unihan_lang.rb', line 23

def only_zh_tw?(text)
  text.chars.all? { |char| @chinese_processor.only_zh_tw?(char) }
end

#zh_cn?(text) ⇒ Boolean

Returns:

  • (Boolean)


19
20
21
# File 'lib/unihan_lang.rb', line 19

def zh_cn?(text)
  language_ratio(text) == :cn
end

#zh_tw?(text) ⇒ Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/unihan_lang.rb', line 15

def zh_tw?(text)
  language_ratio(text) == :tw
end