Module: LexicalUnits
- Defined in:
- lib/lexical_units/words.rb,
lib/lexical_units/string.rb,
lib/lexical_units/version.rb,
lib/lexical_units/sentences.rb,
lib/lexical_units/syllables.rb,
lib/lexical_units/words_and_sentences.rb,
lib/lexical_units/words_without_digits.rb
Overview
Part of split into words, but excluding digits
Defined Under Namespace
Modules: String
Constant Summary collapse
- VERSION =
'0.1.3'
Class Method Summary collapse
-
.sentences(text) ⇒ Object
Split text into sentences.
- .syllables(text) ⇒ Object
-
.words(text) ⇒ Object
Split text into words.
-
.words_and_sentences(text) ⇒ Object
Split text into sentences and each into words.
-
.words_without_digits(text) ⇒ Object
Split text into words without digits.
Class Method Details
.sentences(text) ⇒ Object
Split text into sentences
self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
9 10 11 12 13 |
# File 'lib/lexical_units/sentences.rb', line 9 def self.sentences(text) separators = LexicalUnits.sentence_separators regexp = Regexp.new("[^#{separators}]+[#{separators}]{0,3}") text.scan(regexp).map(&:strip) end |
.syllables(text) ⇒ Object
5 6 |
# File 'lib/lexical_units/syllables.rb', line 5 def self.syllables(text) end |
.words(text) ⇒ Object
Split text into words
self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']
9 10 11 12 |
# File 'lib/lexical_units/words.rb', line 9 def self.words(text) regexp = Regexp.new("[#{LexicalUnits.separators}]") text.gsub(regexp, ' ').split(' ') end |
.words_and_sentences(text) ⇒ Object
Split text into sentences and each into words
self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
[
['Lorem', 'ipsum'],
['Dolor']
]
12 13 14 15 16 |
# File 'lib/lexical_units/words_and_sentences.rb', line 12 def self.words_and_sentences(text) LexicalUnits.sentences(text).map do |sentence| LexicalUnits.words(sentence) end end |
.words_without_digits(text) ⇒ Object
Split text into words without digits
self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']
9 10 11 |
# File 'lib/lexical_units/words_without_digits.rb', line 9 def self.words_without_digits(text) LexicalUnits.words(text).delete_if { |word| numeric?(word) } end |