Module: Spellchecker::DetectNgram

Defined in:
lib/spellchecker/detect_ngram.rb

Constant Summary collapse

NGRAM_RANGE =
(1..4).freeze
SEPARATOR_REGEXP =
/[;,\n.]/.freeze

Class Method Summary collapse

Class Method Details

.call(token) ⇒ Spellchecker::Mistake?

Parameters:

Returns:



12
13
14
15
16
17
18
19
20
21
# File 'lib/spellchecker/detect_ngram.rb', line 12

def call(token)
  text, correction = find_ngram(token)

  return unless correction

  correction = correction.sub(/\S/, &:upcase) if text.match?(/\A[A-Z]/)

  Mistake.new(text: text, correction: correction,
              position: token.position, type: MistakeTypes::GRAMMAR)
end

.fetch_original_text(token, index) ⇒ String

Parameters:

Returns:

  • (String)


42
43
44
45
46
47
48
49
# File 'lib/spellchecker/detect_ngram.rb', line 42

def fetch_original_text(token, index)
  _, list =
    (index + 1).times.reduce([token, []]) do |(t, acc), _|
      [t.prev, acc.prepend(t.text)]
    end

  list.join(' ')
end

.find_ngram(token) ⇒ Array<(String, String)>?

Parameters:

Returns:

  • (Array<(String, String)>, nil)


25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/spellchecker/detect_ngram.rb', line 25

def find_ngram(token)
  NGRAM_RANGE.each_with_object([token.normalized]) do |i, list|
    token = token.next

    break if token.text.match?(SEPARATOR_REGEXP)

    list << token.normalized
    correction = Dictionaries::NgramList.match(list)

    break fetch_original_text(token, i), correction if correction
    break if i == NGRAM_RANGE.end
  end
end