Class: Lingua::IT::Readability

Inherits:
Object
  • Object
show all
Defined in:
lib/lingua/it/readability.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text = '', *delimiters) ⇒ Readability

Initialize the sample with text



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/lingua/it/readability.rb', line 23

def initialize(text = '', *delimiters)

  @paragraph = Lingua::IT::Paragraph
  @sentence  = Lingua::IT::Sentence
  @syllable  = Lingua::IT::Syllable

  if(!delimiters.empty?)
    @sentence.delimiter(delimiters)
  else
    @sentence.reset_delimiter!
  end

  @text                = text.dup
  @paragraphs          = @paragraph.paragraphs(self.text)
  @sentences           = @sentence.sentences(self.text)
  @words               = []
  @frequencies         = {}
  @frequencies.default = 0
  @syllables           = @syllable.syllables(self.text)
  count_words
end

Instance Attribute Details

#frequenciesObject

Returns the value of attribute frequencies.



17
18
19
# File 'lib/lingua/it/readability.rb', line 17

def frequencies
  @frequencies
end

#paragraphObject (readonly)

Returns the value of attribute paragraph.



19
20
21
# File 'lib/lingua/it/readability.rb', line 19

def paragraph
  @paragraph
end

#paragraphsObject

Returns the value of attribute paragraphs.



13
14
15
# File 'lib/lingua/it/readability.rb', line 13

def paragraphs
  @paragraphs
end

#sentenceObject (readonly)

Returns the value of attribute sentence.



18
19
20
# File 'lib/lingua/it/readability.rb', line 18

def sentence
  @sentence
end

#sentencesObject

Returns the value of attribute sentences.



14
15
16
# File 'lib/lingua/it/readability.rb', line 14

def sentences
  @sentences
end

#syllableObject (readonly)

Returns the value of attribute syllable.



20
21
22
# File 'lib/lingua/it/readability.rb', line 20

def syllable
  @syllable
end

#syllablesObject

Returns the value of attribute syllables.



15
16
17
# File 'lib/lingua/it/readability.rb', line 15

def syllables
  @syllables
end

#textObject

Returns the value of attribute text.



12
13
14
# File 'lib/lingua/it/readability.rb', line 12

def text
  @text
end

#wordsObject

Returns the value of attribute words.



16
17
18
# File 'lib/lingua/it/readability.rb', line 16

def words
  @words
end

Instance Method Details

#analyze(text, *delimiters) ⇒ Object

Analyze a text sample with optional delimiters



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/lingua/it/readability.rb', line 46

def analyze(text, *delimiters)
  if(!delimiters.empty?)
    @sentence.delimiter(delimiters)
  else
    @sentence.reset_delimiter!
  end

  @text                = text.dup
  @paragraphs          = @paragraph.paragraphs(self.text)
  @sentences           = @sentence.sentences(self.text)
  @words               = []
  @frequencies         = {}
  @frequencies.default = 0
  @syllables           = @syllable.syllables(self.text)
  count_words
end

#analyze_file(file_name, *delimiters) ⇒ Object

Analyze file content with optional delimiters



64
65
66
67
68
69
70
71
72
73
# File 'lib/lingua/it/readability.rb', line 64

def analyze_file(file_name, *delimiters)
  # check that file exists
  if !File.exists?(file_name)
    raise "An error has occured"
    return
  end
  # slurp file into string and pass it to analyze method
  text = File.open(file_name) { |f| f.read }.strip
  analyze(text, delimiters)
end

#fleschObject

Flesch index of readability expressly calibrated to suit italian text samples, derived from U.S. Flesch index. An index < 40 means a low readable sample, between 40 and 60 it represents a medium readable sample, over 60 a well written sample easily readable by an under 16 person.



153
154
155
156
# File 'lib/lingua/it/readability.rb', line 153

def flesch
  ((206.0 - (65.0 * (num_syllables.to_f / num_words.to_f)) -
    ((num_words.to_f / num_sentences.to_f))) * 100).round / 100.0
end

#gulpeaseObject

Gulpease index of readability expressly calibrated to suit italian text samples. An index < 40 means a low readable sample, between 40 and 60 it represents a medium readable sample, over 60 a well written sample easily readable by an under 16 person.



144
145
146
# File 'lib/lingua/it/readability.rb', line 144

def gulpease
  89 + (((300 * num_sentences) - (10 * num_chars)) / num_words)
end

#num_charsObject Also known as: num_characters

The number of characeters in the sample. A character is defined as a single letter, not taking account of punctuation and spaces



94
95
96
# File 'lib/lingua/it/readability.rb', line 94

def num_chars
  @text.dup.gsub(/[[:punct:]][[:space:]]/, '').scan(/[a-zA-Z0-9_Èàòèéìù\(\)\[\]\{\}]/i).length
end

#num_paragraphsObject

The number of paragraphs in the sample. A paragraph is defined as a newline followed by one or more empty or whitespace-only lines.



82
83
84
# File 'lib/lingua/it/readability.rb', line 82

def num_paragraphs
  @paragraphs.length
end

#num_sentencesObject

The number of sentences in the sample. The meaning of a “sentence” is defined by Lingua::IT::Sentence.



88
89
90
# File 'lib/lingua/it/readability.rb', line 88

def num_sentences
  @sentences.length
end

#num_syllablesObject

The total number of syllables in the text sample. Syllables are defined in Lingua::IT::Syllable.



108
109
110
# File 'lib/lingua/it/readability.rb', line 108

def num_syllables
  @syllables.length
end

#num_unique_wordsObject

The number of different unique words used in the text sample.



113
114
115
# File 'lib/lingua/it/readability.rb', line 113

def num_unique_words
  @frequencies.keys.length
end

#num_wordsObject

The number of words in the sample. A word is defined as a sequence of characters, not taking account of punctuation and spaces, see private method count_words for additional info about a word definition



102
103
104
# File 'lib/lingua/it/readability.rb', line 102

def num_words
  @words.length
end

#occurrences(word) ⇒ Object

The number of occurences of the word word in the text sample.



123
124
125
# File 'lib/lingua/it/readability.rb', line 123

def occurrences(word)
  @frequencies[word]
end

#reportObject

A nicely formatted report on the sample, showing most the useful stats



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/lingua/it/readability.rb', line 160

def report
  sprintf "Sentence delimiters            %s \n" <<
          "Number of paragraphs           %d \n" <<
          "Number of sentences            %d \n" <<
          "Number of syllables            %d \n" <<
          "Number of words                %d \n" <<
          "Number of characters           %d \n\n" <<
          "Average words per sentence     %.2f \n" <<
          "Average syllables per word     %.2f \n\n" <<
          "Gulpease score                 %d \n" <<
          "Flesch score                   %2.2f \n",
          sentence.delim_regex.gsub(/\\/,''), num_paragraphs, num_sentences,
          num_syllables, num_words, num_characters, words_per_sentence,
          syllables_per_word, gulpease, flesch
end

#reset_delimiter!Object

Reset Lingua::IT::Sentence symbols delimiter cache



76
77
78
# File 'lib/lingua/it/readability.rb', line 76

def reset_delimiter!
  @sentence.reset_delimiter!
end

#syllables_per_wordObject

The average number of syllables per word. The syllable count is performed by Lingua::IT::Syllable, and so may not be completely accurate



135
136
137
# File 'lib/lingua/it/readability.rb', line 135

def syllables_per_word
  ((@syllables.length.to_f / words.length.to_f) * 100).round / 100.0
end

#unique_wordsObject

An array containing each unique word used in the text sample.



118
119
120
# File 'lib/lingua/it/readability.rb', line 118

def unique_words
  @frequencies.keys
end

#words_per_sentenceObject

The average number of words per sentence.



128
129
130
# File 'lib/lingua/it/readability.rb', line 128

def words_per_sentence
  ((words.length.to_f / sentences.length.to_f) * 100).round / 100.0
end