Class: Tomereader::Index
- Inherits:
-
Object
- Object
- Tomereader::Index
- Defined in:
- lib/tomereader/index.rb
Instance Method Summary collapse
- #add(content) ⇒ Object
- #check(word_string) ⇒ Object
- #create(word_string) ⇒ Object
- #find(word_string) ⇒ Object
- #find_or_create(word_string) ⇒ Object
-
#initialize ⇒ Index
constructor
include Settings.
- #phrases ⇒ Object
-
#split(content) ⇒ Object
розбиває текст на фрази, витягує слова, встановлює звязки: фраза -> слова, та слово -> фрази.
- #split_into_phrases(content) ⇒ Object
-
#suitable?(word_string) ⇒ Boolean
word word_storage.
- #to_s ⇒ Object
- #words ⇒ Object
- #words_sorted_by_alphabet(count = nil) ⇒ Object
- #words_sorted_by_frequency(count = nil) ⇒ Object
Constructor Details
#initialize ⇒ Index
include Settings
6 7 8 9 10 11 12 13 14 15 |
# File 'lib/tomereader/index.rb', line 6 def initialize #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String #raise StandardError, "Content is empty" if content.empty? #@logger = create_logger @phrase_split_pattern = /[\.\;]/ @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/ @word_storage = Hash.new @total_words = [] @phrase_storage = [] end |
Instance Method Details
#add(content) ⇒ Object
16 17 18 19 20 21 22 |
# File 'lib/tomereader/index.rb', line 16 def add(content) raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String raise StandardError, "Content is empty" if content.empty? phrase_strings = split_into_phrases(content) split(content) self end |
#check(word_string) ⇒ Object
72 73 74 |
# File 'lib/tomereader/index.rb', line 72 def check(word_string) word_string.kind_of?(String) && suitable?(word_string) end |
#create(word_string) ⇒ Object
67 68 69 70 71 |
# File 'lib/tomereader/index.rb', line 67 def create(word_string) if check word_string @word_storage[word_string] = Word.new(word_string) end end |
#find(word_string) ⇒ Object
62 63 64 65 66 |
# File 'lib/tomereader/index.rb', line 62 def find(word_string) if @word_storage.has_key?(word_string) @word_storage[word_string] end end |
#find_or_create(word_string) ⇒ Object
75 76 77 |
# File 'lib/tomereader/index.rb', line 75 def find_or_create(word_string) find(word_string) || create(word_string) end |
#phrases ⇒ Object
29 30 31 |
# File 'lib/tomereader/index.rb', line 29 def phrases @phrase_storage end |
#split(content) ⇒ Object
розбиває текст на фрази, витягує слова, встановлює звязки: фраза -> слова, та слово -> фрази
47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/tomereader/index.rb', line 47 def split(content) split_into_phrases(content).map do |phrase_string| phrase = Phrase.new(phrase_string) phrase.split do |word_string, position| @total_words << word_string word = find_or_create(word_string) word.add(phrase, position) if word.is_a? Word end @phrase_storage << phrase end end |
#split_into_phrases(content) ⇒ Object
23 24 25 |
# File 'lib/tomereader/index.rb', line 23 def split_into_phrases(content) content.split @phrase_split_pattern end |
#suitable?(word_string) ⇒ Boolean
word word_storage
59 60 61 |
# File 'lib/tomereader/index.rb', line 59 def suitable? word_string word_string =~ @word_pattern end |
#to_s ⇒ Object
42 43 44 |
# File 'lib/tomereader/index.rb', line 42 def to_s {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count} end |
#words ⇒ Object
26 27 28 |
# File 'lib/tomereader/index.rb', line 26 def words @word_storage end |
#words_sorted_by_alphabet(count = nil) ⇒ Object
32 33 34 35 36 |
# File 'lib/tomereader/index.rb', line 32 def words_sorted_by_alphabet(count=nil) words = self.words.sort_by{|key, value| key} words = words.first(count) unless count.nil? Hash[words] end |
#words_sorted_by_frequency(count = nil) ⇒ Object
37 38 39 40 41 |
# File 'lib/tomereader/index.rb', line 37 def words_sorted_by_frequency(count=nil) words = self.words.sort_by{|key, value| value.frequency} words = words.first(count) unless count.nil? Hash[words] end |