Class: Dictionaries::SpellChecker

Inherits:
Base
  • Object
show all
Defined in:
lib/dictionaries/spell_checker/spell_checker.rb

Overview

Dictionaries::SpellChecker

Constant Summary collapse

NAMESPACE =
#

NAMESPACE

#
inspect
FILE_IGNORE_THESE_WORDS =
#

FILE_IGNORE_THESE_WORDS

#
'IGNORE_THESE_WORDS.md'

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#commandline_arguments?, #first_argument?, #opne, #rev, #set_commandline_arguments, #sfancy

Constructor Details

#initialize(commandline_arguments = nil, run_already = true) ⇒ SpellChecker

#

initialize

#


34
35
36
37
38
39
40
41
42
43
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 34

def initialize(
    commandline_arguments = nil,
    run_already           = true
  )
  reset
  set_commandline_arguments(
    commandline_arguments
  )
  run if run_already
end

Class Method Details

.[](i = ARGV) ⇒ Object

#

Dictionaries::SpellChecker[]

#


125
126
127
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 125

def self.[](i = ARGV)
  new(i)
end

Instance Method Details

#do_compare_each_discovered_word_towards_the_english_dictionary(array_all_discovered_words) ⇒ Object

#

do_compare_each_discovered_word_towards_the_english_dictionary

#


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 91

def do_compare_each_discovered_word_towards_the_english_dictionary(
    array_all_discovered_words
  )
  if File.exist? FILE_IGNORE_THESE_WORDS
    @ignore_these_words << YAML.load_file(FILE_IGNORE_THESE_WORDS)
    @ignore_these_words.flatten!
    @ignore_these_words.uniq!
    @ignore_these_words.compact!
  end
  hash_english_dictionary = YAML.load_file(Dictionaries.file_english?)
  e rev+'Now comparing each discovered word towards the english dictionary.'
  array_these_words_are_not_part_of_the_yaml_file = []
  uniq = array_all_discovered_words.uniq.sort # Always keep these entries sorted.
  uniq.each {|this_word|
    this_word = this_word.to_s.downcase # Always keep them downcased here.
    result = hash_english_dictionary.has_key?(this_word)
    if result
      # e 'Yup, found '+this_word.to_s+'.' # This is fairly useless, hence why it is commented out.
    # elsif @ignore_these_words.include? this_word
      # If we have to debug then we can enter this clause.
    else
      e "#{rev}No matching entry found for the word `#{sfancy(this_word)}`."
      array_these_words_are_not_part_of_the_yaml_file << this_word
    end
  }
  unless array_these_words_are_not_part_of_the_yaml_file.empty?
    e array_these_words_are_not_part_of_the_yaml_file.size.to_s+
      ' words are not registered in the .yml file.'
  end
end

#report_how_many_words_were_found(array) ⇒ Object

#

report_how_many_words_were_found

#


67
68
69
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 67

def report_how_many_words_were_found(array)
  e rev+'Found '+array.size.to_s+' words.'
end

#resetObject

#

reset (reset tag)

#


48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 48

def reset
  super()
  # ======================================================================= #
  # === @namespace
  # ======================================================================= #
  @namespace = NAMESPACE
  # ======================================================================= #
  # === @ignore_these_words
  #
  # The following variable specifies which words or word-like entries
  # this class has to ignore. This is typically supplied by the user,
  # such as by reading from the file IGNORE_THESE_WORDS.md.
  # ======================================================================= #
  @ignore_these_words = []
end

#runObject

#

run (run tag)

#


74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/dictionaries/spell_checker/spell_checker.rb', line 74

def run
  _ = first_argument?
  if _ and File.file?(_)
    # ===================================================================== #
    # Read dataset from a locally existing file:
    # ===================================================================== #
    dataset = File.read(_).tr("\n",' ')
    splitted = dataset.
               split(/[^[[:word:]]]+/) # This splits on each word. Works fairly well.
    report_how_many_words_were_found(splitted)
    do_compare_each_discovered_word_towards_the_english_dictionary(splitted)
  end
end