Class: UnihanLang::Unihan
- Inherits:
-
Object
- Object
- UnihanLang::Unihan
- Defined in:
- lib/unihan_lang.rb
Instance Method Summary collapse
- #analyze_with_variants(text) ⇒ Object
- #contains_chinese?(text) ⇒ Boolean
- #contains_zh_cn?(text) ⇒ Boolean
- #contains_zh_tw?(text) ⇒ Boolean
- #determine_language(text) ⇒ Object
- #determine_language_with_variants(text) ⇒ Object
- #extract_chinese_characters(text) ⇒ Object
-
#initialize ⇒ Unihan
constructor
A new instance of Unihan.
- #only_zh_cn?(text) ⇒ Boolean
- #only_zh_tw?(text) ⇒ Boolean
- #zh_cn?(text) ⇒ Boolean
- #zh_tw?(text) ⇒ Boolean
Constructor Details
#initialize ⇒ Unihan
Returns a new instance of Unihan.
10 11 12 13 |
# File 'lib/unihan_lang.rb', line 10 def initialize @chinese_processor = ChineseProcessor.new @variant_mapping = VariantMapping.new end |
Instance Method Details
#analyze_with_variants(text) ⇒ Object
55 56 57 58 59 60 61 62 |
# File 'lib/unihan_lang.rb', line 55 def analyze_with_variants(text) analyzer = ChineseScoreAnalyzer.new(text, @chinese_processor, @variant_mapping) { traditional_score: analyzer.traditional_score, simplified_score: analyzer.simplified_score, total_chinese: analyzer.total_chinese, } end |
#contains_chinese?(text) ⇒ Boolean
39 40 41 |
# File 'lib/unihan_lang.rb', line 39 def contains_chinese?(text) text.chars.any? { |char| @chinese_processor.chinese_character?(char) } end |
#contains_zh_cn?(text) ⇒ Boolean
35 36 37 |
# File 'lib/unihan_lang.rb', line 35 def contains_zh_cn?(text) text.chars.any? { |char| @chinese_processor.only_zh_cn?(char) } end |
#contains_zh_tw?(text) ⇒ Boolean
31 32 33 |
# File 'lib/unihan_lang.rb', line 31 def contains_zh_tw?(text) text.chars.any? { |char| @chinese_processor.only_zh_tw?(char) } end |
#determine_language(text) ⇒ Object
47 48 49 50 51 52 53 |
# File 'lib/unihan_lang.rb', line 47 def determine_language(text) case language_ratio(text) when :tw then "ZH_TW" when :cn then "ZH_CN" else "Unknown" end end |
#determine_language_with_variants(text) ⇒ Object
64 65 66 67 |
# File 'lib/unihan_lang.rb', line 64 def determine_language_with_variants(text) analyzer = ChineseScoreAnalyzer.new(text, @chinese_processor, @variant_mapping) analyzer.dominant_language end |
#extract_chinese_characters(text) ⇒ Object
43 44 45 |
# File 'lib/unihan_lang.rb', line 43 def extract_chinese_characters(text) text.chars.select { |char| @chinese_processor.chinese_character?(char) } end |
#only_zh_cn?(text) ⇒ Boolean
27 28 29 |
# File 'lib/unihan_lang.rb', line 27 def only_zh_cn?(text) text.chars.all? { |char| @chinese_processor.only_zh_cn?(char) } end |
#only_zh_tw?(text) ⇒ Boolean
23 24 25 |
# File 'lib/unihan_lang.rb', line 23 def only_zh_tw?(text) text.chars.all? { |char| @chinese_processor.only_zh_tw?(char) } end |
#zh_cn?(text) ⇒ Boolean
19 20 21 |
# File 'lib/unihan_lang.rb', line 19 def zh_cn?(text) language_ratio(text) == :cn end |
#zh_tw?(text) ⇒ Boolean
15 16 17 |
# File 'lib/unihan_lang.rb', line 15 def zh_tw?(text) language_ratio(text) == :tw end |