Module: Igo::Ja

Defined in:
lib/igo/ja.rb

Overview

## 使い方

Usage / Ja

require 'igo'
require 'igo/ja'

j = Igo::Ja

cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]

cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
#=>  "あー 、 合成 は 結合法則 を 満たす ん でした ね"

Constant Summary collapse

SEARCH_URL =
"https://jisho.org/search/"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.cut(str, s: false) ⇒ Object

## 使い方

Usage / Ja

j = Igo::Ja

cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]

cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
#=>  "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"


39
40
41
42
43
44
45
46
47
48
# File 'lib/igo/ja.rb', line 39

def cut str, s: false
  # str = URI.encode_www_form_component(str)
  # doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
  # cutted = doc.css(".japanese_word__text_wrapper").map{_1.text.strip}
  # # s ? cutted.join(s) : cutted
  # sep = s.is_a?(String) ? s : " "
  # s ? cutted.join(sep) : cutted

  tag(str, s: s, kana: false, tag: false)
end

.kana(str, s: false, lr: "()") ⇒ Object

def kana str end



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/igo/ja.rb', line 55

def kana str, s: false, lr: "()"
  # str = URI.encode_www_form_component(str)
  # doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)

  # cutted = doc.css(".japanese_word__furigana, .japanese_word__text_wrapper").select{_1.css(".japanese_word__text_with_furigana").empty?}
  # cutted = cutted.map do kanji = _1.attr("data-text")
  #   (kanji.nil? or kanji.empty?) ? [_1.text] : [kanji, _1.text]
  # end

  # if s
  #   unless s.is_a?(String)
  #     s = " "
  #   end
  #   cutted.map(&:last).join(s)
  # else cutted.map(&:last)
  # end

  tag(str, s: s, lr: lr, tag: false, kana: true, kana_only: true)
end

.romaji(str, s: false) ⇒ Object

TODO: tag word function



101
102
103
# File 'lib/igo/ja.rb', line 101

def romaji str, s: false
  tag str, s: s, romaji: true, kana_only: true, tag: false
end

.ruby(str, s: false, lr: "()", romaji: false) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/igo/ja.rb', line 75

def ruby str, s: false, lr: "()", romaji: false
  # str = URI.encode_www_form_component(str)
  # doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)

  # cutted = doc.css(".japanese_word__furigana, .japanese_word__text_wrapper").select{_1.css(".japanese_word__text_with_furigana").empty?}
  # cutted = cutted.map do kanji = _1.attr("data-text")
  #   (kanji.nil? or kanji.empty?) ? [_1.text] : [kanji, _1.text]
  # end

  # if s
  #   unless s.is_a?(String)
  #     s = " "
  #   end
  #   left, right = case lr
  #   when String then (lr*2).split("").values_at(0, -1)
  #   when Array then lr
  #   end
  #   cutted.map{ _1.size >= 2 ? _1[0]+left+_1[1]+right : _1[0] }.join(s)
  # else cutted
  # end

  tag str, s: s, lr: lr, kana: true, tag: false, romaji: romaji

end

.tag(str, s: false, ns: 0, lr: "()", sp: "_", short: false, tag: true, kana: false, timeout: 10, kana_only: false, romaji: false) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/igo/ja.rb', line 105

def tag str, s: false, ns: 0, lr: "()", sp:"_", short: false, tag: true, kana: false, timeout: 10, kana_only: false, romaji: false

  def async_query(arr, timeout=0, &block)
    promises = arr.map do |element|
      Concurrent::Promise.execute do
        begin
          Timeout.timeout(timeout) do # 设置最大执行时间为5秒
            block.call(element)
          end
        rescue Timeout::Error
          # 处理超时异常
          puts "任务执行超时, #{timeout} 秒!关键词参数 timeout: 指定超时秒数!"
          nil
        end
      end
    end

    # 等待所有任务完成
    results = promises.map(&:value!)

    # 获取结果数组
    results
  end

  def _tag str, romaji: false
    str = URI.encode_www_form_component(str)
    doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)

    cutted = doc.css(".japanese_word")
    .map do
      text = _1.css(".japanese_word__text_wrapper, japanese_word__text_wrapper").text.strip
      kn = _1&.css(".japanese_word__furigana").text
      cat = _1.attr("data-pos")

      romj = kn.empty? ? _romaji(text) : _romaji(kn)

      [text, # text
      romaji ? romj : kn , # romaji || kana || ""
      cat] # tag || nil
    end
  end

  def _stringify cutted, s: "/", lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
    # cutted.each{ _1[1] = nil } unless vis_kana
    # cutted.each{ _1[2] = nil } unless vis_tag
    if kana_only
      return cutted.map do
        (_1[1].empty? ? _1[0] : _1[1]) +
        ((vis_kana && _1[2]) ? (sp + _1[2]) : "")
      end.join(s)
    else
      unless s.is_a?(String)
        s = " "
      end
      left, right = case lr
      when String then (lr*2).split("").values_at(0, -1)
      when Array then lr
      end
      cutted.map do
        _1[0] +
        ((vis_tag && !_1[1]&.empty? )? (left + _1[1] + right) : "") +
        ((vis_kana && _1[2]) ? (sp + _1[2]) : "")
      end.join(s)
    end
  end

  def singo_proc str, s: false, ns: 0, lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
    cutted = _tag str, romaji: romaji
    if short
      short = short.is_a?(Integer) ? short : 4
      cutted = cutted.map{[ *_1[0,2],      (_1[2][0, short].downcase rescue nil)   ]}
    end

    if s
      _stringify cutted, s: s, lr: lr, sp: sp, short: short, vis_tag: vis_kana, vis_kana: vis_tag, kana_only: kana_only, romaji: romaji
    else
      cutted = cutted.map{_1.values_at(* [0, vis_kana ? 1 : 0, vis_tag ? 2 : 0].uniq)}
      cutted[0].size == 1 ? cutted.flatten : cutted
    end
  end

  case str
  when String
    singo_proc str, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only, romaji: romaji
  when Array
    async_query str, timeout do
      singo_proc _1, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only, romaji: romaji
    end
  end
  # TODO
end

Instance Method Details

#_stringify(cutted, s: "/", lr: "()", sp: "_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/igo/ja.rb', line 147

def _stringify cutted, s: "/", lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
  # cutted.each{ _1[1] = nil } unless vis_kana
  # cutted.each{ _1[2] = nil } unless vis_tag
  if kana_only
    return cutted.map do
      (_1[1].empty? ? _1[0] : _1[1]) +
      ((vis_kana && _1[2]) ? (sp + _1[2]) : "")
    end.join(s)
  else
    unless s.is_a?(String)
      s = " "
    end
    left, right = case lr
    when String then (lr*2).split("").values_at(0, -1)
    when Array then lr
    end
    cutted.map do
      _1[0] +
      ((vis_tag && !_1[1]&.empty? )? (left + _1[1] + right) : "") +
      ((vis_kana && _1[2]) ? (sp + _1[2]) : "")
    end.join(s)
  end
end

#_tag(str, romaji: false) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/igo/ja.rb', line 129

def _tag str, romaji: false
  str = URI.encode_www_form_component(str)
  doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)

  cutted = doc.css(".japanese_word")
  .map do
    text = _1.css(".japanese_word__text_wrapper, japanese_word__text_wrapper").text.strip
    kn = _1&.css(".japanese_word__furigana").text
    cat = _1.attr("data-pos")

    romj = kn.empty? ? _romaji(text) : _romaji(kn)

    [text, # text
    romaji ? romj : kn , # romaji || kana || ""
    cat] # tag || nil
  end
end

#async_query(arr, timeout = 0, &block) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/igo/ja.rb', line 107

def async_query(arr, timeout=0, &block)
  promises = arr.map do |element|
    Concurrent::Promise.execute do
      begin
        Timeout.timeout(timeout) do # 设置最大执行时间为5秒
          block.call(element)
        end
      rescue Timeout::Error
        # 处理超时异常
        puts "任务执行超时, #{timeout} 秒!关键词参数 timeout: 指定超时秒数!"
        nil
      end
    end
  end

  # 等待所有任务完成
  results = promises.map(&:value!)

  # 获取结果数组
  results
end

#singo_proc(str, s: false, ns: 0, lr: "()", sp: "_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false) ⇒ Object



171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/igo/ja.rb', line 171

def singo_proc str, s: false, ns: 0, lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
  cutted = _tag str, romaji: romaji
  if short
    short = short.is_a?(Integer) ? short : 4
    cutted = cutted.map{[ *_1[0,2],      (_1[2][0, short].downcase rescue nil)   ]}
  end

  if s
    _stringify cutted, s: s, lr: lr, sp: sp, short: short, vis_tag: vis_kana, vis_kana: vis_tag, kana_only: kana_only, romaji: romaji
  else
    cutted = cutted.map{_1.values_at(* [0, vis_kana ? 1 : 0, vis_tag ? 2 : 0].uniq)}
    cutted[0].size == 1 ? cutted.flatten : cutted
  end
end