Class: Tomereader::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/tomereader/index.rb

Instance Method Summary collapse

Constructor Details

#initializeIndex

include Settings



6
7
8
9
10
11
12
13
14
15
# File 'lib/tomereader/index.rb', line 6

def initialize
  #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
  #raise StandardError, "Content is empty" if content.empty?
  #@logger = create_logger
  @phrase_split_pattern = /[\.\;]/
  @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/
  @word_storage = Hash.new
  @total_words = []
  @phrase_storage = []
end

Instance Method Details

#add(content) ⇒ Object

Raises:

  • (ArgumentError)


16
17
18
19
20
21
22
# File 'lib/tomereader/index.rb', line 16

def add(content)
  raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
  raise StandardError, "Content is empty" if content.empty?
  phrase_strings = split_into_phrases(content)
  split(content)
  self
end

#check(word_string) ⇒ Object



72
73
74
# File 'lib/tomereader/index.rb', line 72

def check(word_string)
  word_string.kind_of?(String) && suitable?(word_string)
end

#create(word_string) ⇒ Object



67
68
69
70
71
# File 'lib/tomereader/index.rb', line 67

def create(word_string)
  if check word_string
    @word_storage[word_string] = Word.new(word_string)
  end
end

#find(word_string) ⇒ Object



62
63
64
65
66
# File 'lib/tomereader/index.rb', line 62

def find(word_string)
  if @word_storage.has_key?(word_string)
    @word_storage[word_string]
  end
end

#find_or_create(word_string) ⇒ Object



75
76
77
# File 'lib/tomereader/index.rb', line 75

def find_or_create(word_string)
  find(word_string) || create(word_string)
end

#phrasesObject



29
30
31
# File 'lib/tomereader/index.rb', line 29

def phrases
  @phrase_storage
end

#split(content) ⇒ Object

розбиває текст на фрази, витягує слова, встановлює звязки: фраза -> слова, та слово -> фрази



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/tomereader/index.rb', line 47

def split(content)
    split_into_phrases(content).map do |phrase_string|
      phrase = Phrase.new(phrase_string)
      phrase.split do |word_string, position|
        @total_words << word_string
        word = find_or_create(word_string)
        word.add(phrase, position) if word.is_a? Word
      end
      @phrase_storage << phrase
    end
end

#split_into_phrases(content) ⇒ Object



23
24
25
# File 'lib/tomereader/index.rb', line 23

def split_into_phrases(content)
  content.split @phrase_split_pattern
end

#suitable?(word_string) ⇒ Boolean

word word_storage

Returns:

  • (Boolean)


59
60
61
# File 'lib/tomereader/index.rb', line 59

def suitable? word_string
  word_string =~ @word_pattern
end

#to_sObject



42
43
44
# File 'lib/tomereader/index.rb', line 42

def to_s
  {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count}
end

#wordsObject



26
27
28
# File 'lib/tomereader/index.rb', line 26

def words
  @word_storage
end

#words_sorted_by_alphabet(count = nil) ⇒ Object



32
33
34
35
36
# File 'lib/tomereader/index.rb', line 32

def words_sorted_by_alphabet(count=nil)
  words = self.words.sort_by{|key, value| key}
  words = words.first(count) unless count.nil?
  Hash[words]
end

#words_sorted_by_frequency(count = nil) ⇒ Object



37
38
39
40
41
# File 'lib/tomereader/index.rb', line 37

def words_sorted_by_frequency(count=nil)
  words = self.words.sort_by{|key, value| value.frequency}
  words = words.first(count) unless count.nil?
  Hash[words]
end