Class: Autotag::Extractor::Textblock

Inherits:
Object
  • Object
show all
Defined in:
lib/autotag/extractor/document/textblock.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str, charsize, wordsize) ⇒ Textblock

size, HTML data



6
7
8
9
10
11
12
# File 'lib/autotag/extractor/document/textblock.rb', line 6

def initialize(str,charsize,wordsize)
  # count the number of blocks of non-whitespace characters
  @charsize = charsize
  @wordsize = wordsize
  @words = str.split(/\p{Z}+/).reject{|f| f.empty?}
  @size = @words.size
end

Instance Attribute Details

#sizeObject (readonly)

Returns the value of attribute size.



3
4
5
# File 'lib/autotag/extractor/document/textblock.rb', line 3

def size
  @size
end

#wordsObject (readonly)

def stemwords



19
20
21
# File 'lib/autotag/extractor/document/textblock.rb', line 19

def words
  @words
end

Instance Method Details

#[](index) ⇒ Object



29
30
31
# File 'lib/autotag/extractor/document/textblock.rb', line 29

def [] (index)
  @words[index]
end

#plaintextObject



25
26
27
# File 'lib/autotag/extractor/document/textblock.rb', line 25

def plaintext
  @words.join(' ')
end

#ratioObject



14
15
16
# File 'lib/autotag/extractor/document/textblock.rb', line 14

def ratio
  return @wordsize.to_f/@charsize.to_f
end