Class: AnyStyle::Feature::Words
- Inherits:
-
AnyStyle::Feature
- Object
- AnyStyle::Feature
- AnyStyle::Feature::Words
- Defined in:
- lib/anystyle/feature/words.rb
Constant Summary collapse
- TITLE_WORDS =
%w{ abstract acknowledgements appendix bibliography bibliographie chapter cited contents figures introduction literatur literature references referenzen section tables works }
Instance Attribute Summary collapse
-
#dictionary ⇒ Object
readonly
Returns the value of attribute dictionary.
Attributes inherited from AnyStyle::Feature
Instance Method Summary collapse
- #classify(word) ⇒ Object
-
#initialize(dictionary:, **opts) ⇒ Words
constructor
A new instance of Words.
- #observe(token, **opts) ⇒ Object
Methods inherited from AnyStyle::Feature
Methods included from StringUtils
canonize, count, display_chars, display_width, indent, page_break?, scrub, transliterate
Constructor Details
#initialize(dictionary:, **opts) ⇒ Words
Returns a new instance of Words.
26 27 28 29 |
# File 'lib/anystyle/feature/words.rb', line 26 def initialize(dictionary:, **opts) super(**opts) @dictionary = dictionary end |
Instance Attribute Details
#dictionary ⇒ Object (readonly)
Returns the value of attribute dictionary.
4 5 6 |
# File 'lib/anystyle/feature/words.rb', line 4 def dictionary @dictionary end |
Instance Method Details
#classify(word) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/anystyle/feature/words.rb', line 60 def classify(word) case word when /^(\d+|[vx]?iii?|i?[vx]|)$/i :number when /\d/ :numeric when nil :none else :alpha end end |
#observe(token, **opts) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/anystyle/feature/words.rb', line 31 def observe(token, **opts) words = token.scan(/\S+/).map { |word| canonize word }.reject(&:empty?) spacers = token.scan(/\S\s\s+\S/) numbers = token.scan(/\d+(\.\d+)?/) title = words.count { |word| TITLE_WORDS.include?(word) } counts = dictionary.tag_counts(words) if words.length > 0 len = words.map(&:length).sort avg = len.reduce(0, :+) / len.length med = len.length.even? ? len[(len.length - 1) / 2, 2].reduce(0, :+) / 2 : len[len.length / 2] else avg, med = 0, 0 end [ words.length, avg, med, spacers.length, classify(words[0]), numbers.length, ratio(title, words.length), *counts.map { |cnt| ratio(cnt, words.length) } ] end |