Class: Lingua::IT::Readability

Inherits:

Object

Object
Lingua::IT::Readability

show all

Defined in:: lib/lingua/it/readability.rb

Instance Attribute Summary collapse

#frequencies ⇒ Object

Returns the value of attribute frequencies.
#paragraph ⇒ Object readonly

Returns the value of attribute paragraph.
#paragraphs ⇒ Object

Returns the value of attribute paragraphs.
#sentence ⇒ Object readonly

Returns the value of attribute sentence.
#sentences ⇒ Object

Returns the value of attribute sentences.
#syllable ⇒ Object readonly

Returns the value of attribute syllable.
#syllables ⇒ Object

Returns the value of attribute syllables.
#text ⇒ Object

Returns the value of attribute text.
#words ⇒ Object

Returns the value of attribute words.

Instance Method Summary collapse

#analyze(text, *delimiters) ⇒ Object

Analyze a text sample with optional delimiters.
#analyze_file(file_name, *delimiters) ⇒ Object

Analyze file content with optional delimiters.
#flesch ⇒ Object

Flesch index of readability expressly calibrated to suit italian text samples, derived from U.S.
#gulpease ⇒ Object

Gulpease index of readability expressly calibrated to suit italian text samples.
#initialize(text = '', *delimiters) ⇒ Readability constructor

Initialize the sample with text.
#num_chars ⇒ Object (also: #num_characters)

The number of characeters in the sample.
#num_paragraphs ⇒ Object

The number of paragraphs in the sample.
#num_sentences ⇒ Object

The number of sentences in the sample.
#num_syllables ⇒ Object

The total number of syllables in the text sample.
#num_unique_words ⇒ Object

The number of different unique words used in the text sample.
#num_words ⇒ Object

The number of words in the sample.
#occurrences(word) ⇒ Object

The number of occurences of the word word in the text sample.
#report ⇒ Object

A nicely formatted report on the sample, showing most the useful stats.
#reset_delimiter! ⇒ Object

Reset Lingua::IT::Sentence symbols delimiter cache.
#syllables_per_word ⇒ Object

The average number of syllables per word.
#unique_words ⇒ Object

An array containing each unique word used in the text sample.
#words_per_sentence ⇒ Object

The average number of words per sentence.

Constructor Details

#initialize(text = '', *delimiters) ⇒ `Readability`

Initialize the sample with text

# File 'lib/lingua/it/readability.rb', line 23

def initialize(text = '', *delimiters)

  @paragraph = Lingua::IT::Paragraph
  @sentence  = Lingua::IT::Sentence
  @syllable  = Lingua::IT::Syllable

  if(!delimiters.empty?)
    @sentence.delimiter(delimiters)
  else
    @sentence.reset_delimiter!
  end

  @text                = text.dup
  @paragraphs          = @paragraph.paragraphs(self.text)
  @sentences           = @sentence.sentences(self.text)
  @words               = []
  @frequencies         = {}
  @frequencies.default = 0
  @syllables           = @syllable.syllables(self.text)
  count_words
end

Instance Attribute Details

#frequencies ⇒ `Object`

Returns the value of attribute frequencies.



17
18
19

# File 'lib/lingua/it/readability.rb', line 17

def frequencies
  @frequencies
end

#paragraph ⇒ `Object` (readonly)

Returns the value of attribute paragraph.



19
20
21

# File 'lib/lingua/it/readability.rb', line 19

def paragraph
  @paragraph
end

#paragraphs ⇒ `Object`

Returns the value of attribute paragraphs.



13
14
15

# File 'lib/lingua/it/readability.rb', line 13

def paragraphs
  @paragraphs
end

#sentence ⇒ `Object` (readonly)

Returns the value of attribute sentence.



18
19
20

# File 'lib/lingua/it/readability.rb', line 18

def sentence
  @sentence
end

#sentences ⇒ `Object`

Returns the value of attribute sentences.



14
15
16

# File 'lib/lingua/it/readability.rb', line 14

def sentences
  @sentences
end

#syllable ⇒ `Object` (readonly)

Returns the value of attribute syllable.



20
21
22

# File 'lib/lingua/it/readability.rb', line 20

def syllable
  @syllable
end

#syllables ⇒ `Object`

Returns the value of attribute syllables.



15
16
17

# File 'lib/lingua/it/readability.rb', line 15

def syllables
  @syllables
end

#text ⇒ `Object`

Returns the value of attribute text.



12
13
14

# File 'lib/lingua/it/readability.rb', line 12

def text
  @text
end

#words ⇒ `Object`

Returns the value of attribute words.



16
17
18

# File 'lib/lingua/it/readability.rb', line 16

def words
  @words
end

Instance Method Details

#analyze(text, *delimiters) ⇒ `Object`

Analyze a text sample with optional delimiters

# File 'lib/lingua/it/readability.rb', line 46

def analyze(text, *delimiters)
  if(!delimiters.empty?)
    @sentence.delimiter(delimiters)
  else
    @sentence.reset_delimiter!
  end

  @text                = text.dup
  @paragraphs          = @paragraph.paragraphs(self.text)
  @sentences           = @sentence.sentences(self.text)
  @words               = []
  @frequencies         = {}
  @frequencies.default = 0
  @syllables           = @syllable.syllables(self.text)
  count_words
end

#analyze_file(file_name, *delimiters) ⇒ `Object`

Analyze file content with optional delimiters

# File 'lib/lingua/it/readability.rb', line 64

def analyze_file(file_name, *delimiters)
  # check that file exists
  if !File.exists?(file_name)
    raise "An error has occured"
    return
  end
  # slurp file into string and pass it to analyze method
  text = File.open(file_name) { |f| f.read }.strip
  analyze(text, delimiters)
end

#flesch ⇒ `Object`

Flesch index of readability expressly calibrated to suit italian text samples, derived from U.S. Flesch index. An index < 40 means a low readable sample, between 40 and 60 it represents a medium readable sample, over 60 a well written sample easily readable by an under 16 person.

# File 'lib/lingua/it/readability.rb', line 153

def flesch
  ((206.0 - (65.0 * (num_syllables.to_f / num_words.to_f)) -
    ((num_words.to_f / num_sentences.to_f))) * 100).round / 100.0
end

#gulpease ⇒ `Object`

Gulpease index of readability expressly calibrated to suit italian text samples. An index < 40 means a low readable sample, between 40 and 60 it represents a medium readable sample, over 60 a well written sample easily readable by an under 16 person.



144
145
146

# File 'lib/lingua/it/readability.rb', line 144

def gulpease
  89 + (((300 * num_sentences) - (10 * num_chars)) / num_words)
end

#num_chars ⇒ `Object` Also known as: num_characters

The number of characeters in the sample. A character is defined as a single letter, not taking account of punctuation and spaces



94
95
96

# File 'lib/lingua/it/readability.rb', line 94

def num_chars
  @text.dup.gsub(/[[:punct:]][[:space:]]/, '').scan(/[a-zA-Z0-9_Èàòèéìù\(\)\[\]\{\}]/i).length
end

#num_paragraphs ⇒ `Object`

The number of paragraphs in the sample. A paragraph is defined as a newline followed by one or more empty or whitespace-only lines.



82
83
84

# File 'lib/lingua/it/readability.rb', line 82

def num_paragraphs
  @paragraphs.length
end

#num_sentences ⇒ `Object`

The number of sentences in the sample. The meaning of a “sentence” is defined by Lingua::IT::Sentence.



88
89
90

# File 'lib/lingua/it/readability.rb', line 88

def num_sentences
  @sentences.length
end

#num_syllables ⇒ `Object`

The total number of syllables in the text sample. Syllables are defined in Lingua::IT::Syllable.



108
109
110

# File 'lib/lingua/it/readability.rb', line 108

def num_syllables
  @syllables.length
end

#num_unique_words ⇒ `Object`

The number of different unique words used in the text sample.



113
114
115

# File 'lib/lingua/it/readability.rb', line 113

def num_unique_words
  @frequencies.keys.length
end

#num_words ⇒ `Object`

The number of words in the sample. A word is defined as a sequence of characters, not taking account of punctuation and spaces, see private method count_words for additional info about a word definition



102
103
104

# File 'lib/lingua/it/readability.rb', line 102

def num_words
  @words.length
end

#occurrences(word) ⇒ `Object`

The number of occurences of the word word in the text sample.



123
124
125

# File 'lib/lingua/it/readability.rb', line 123

def occurrences(word)
  @frequencies[word]
end

#report ⇒ `Object`

A nicely formatted report on the sample, showing most the useful stats

# File 'lib/lingua/it/readability.rb', line 160

def report
  sprintf "Sentence delimiters            %s \n" <<
          "Number of paragraphs           %d \n" <<
          "Number of sentences            %d \n" <<
          "Number of syllables            %d \n" <<
          "Number of words                %d \n" <<
          "Number of characters           %d \n\n" <<
          "Average words per sentence     %.2f \n" <<
          "Average syllables per word     %.2f \n\n" <<
          "Gulpease score                 %d \n" <<
          "Flesch score                   %2.2f \n",
          sentence.delim_regex.gsub(/\\/,''), num_paragraphs, num_sentences,
          num_syllables, num_words, num_characters, words_per_sentence,
          syllables_per_word, gulpease, flesch
end

#reset_delimiter! ⇒ `Object`

Reset Lingua::IT::Sentence symbols delimiter cache



76
77
78

# File 'lib/lingua/it/readability.rb', line 76

def reset_delimiter!
  @sentence.reset_delimiter!
end

#syllables_per_word ⇒ `Object`

The average number of syllables per word. The syllable count is performed by Lingua::IT::Syllable, and so may not be completely accurate



135
136
137

# File 'lib/lingua/it/readability.rb', line 135

def syllables_per_word
  ((@syllables.length.to_f / words.length.to_f) * 100).round / 100.0
end

#unique_words ⇒ `Object`

An array containing each unique word used in the text sample.



118
119
120

# File 'lib/lingua/it/readability.rb', line 118

def unique_words
  @frequencies.keys
end

#words_per_sentence ⇒ `Object`

The average number of words per sentence.



128
129
130

# File 'lib/lingua/it/readability.rb', line 128

def words_per_sentence
  ((words.length.to_f / sentences.length.to_f) * 100).round / 100.0
end

Class: Lingua::IT::Readability

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text = '', *delimiters) ⇒ Readability

Instance Attribute Details

#frequencies ⇒ Object

#paragraph ⇒ Object (readonly)

#paragraphs ⇒ Object

#sentence ⇒ Object (readonly)

#sentences ⇒ Object

#syllable ⇒ Object (readonly)

#syllables ⇒ Object

#text ⇒ Object

#words ⇒ Object

Instance Method Details

#analyze(text, *delimiters) ⇒ Object

#analyze_file(file_name, *delimiters) ⇒ Object

#flesch ⇒ Object

#gulpease ⇒ Object

#num_chars ⇒ Object Also known as: num_characters

#num_paragraphs ⇒ Object

#num_sentences ⇒ Object

#num_syllables ⇒ Object

#num_unique_words ⇒ Object

#num_words ⇒ Object

#occurrences(word) ⇒ Object

#report ⇒ Object

#reset_delimiter! ⇒ Object

#syllables_per_word ⇒ Object

#unique_words ⇒ Object

#words_per_sentence ⇒ Object