Module: LexicalUnits

Defined in:
lib/lexical_units/words.rb,
lib/lexical_units/string.rb,
lib/lexical_units/version.rb,
lib/lexical_units/sentences.rb,
lib/lexical_units/syllables.rb,
lib/lexical_units/words_and_sentences.rb,
lib/lexical_units/words_without_digits.rb

Overview

Part of split into words, but excluding digits

Defined Under Namespace

Modules: String

Constant Summary collapse

VERSION =
'0.1.3'

Class Method Summary collapse

Class Method Details

.sentences(text) ⇒ Object

Split text into sentences

self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']


9
10
11
12
13
# File 'lib/lexical_units/sentences.rb', line 9

def self.sentences(text)
  separators = LexicalUnits.sentence_separators
  regexp = Regexp.new("[^#{separators}]+[#{separators}]{0,3}")
  text.scan(regexp).map(&:strip)
end

.syllables(text) ⇒ Object



5
6
# File 'lib/lexical_units/syllables.rb', line 5

def self.syllables(text)
end

.words(text) ⇒ Object

Split text into words

self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']


9
10
11
12
# File 'lib/lexical_units/words.rb', line 9

def self.words(text)
  regexp = Regexp.new("[#{LexicalUnits.separators}]")
  text.gsub(regexp, ' ').split(' ')
end

.words_and_sentences(text) ⇒ Object

Split text into sentences and each into words

self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
[
  ['Lorem', 'ipsum'],
  ['Dolor']
]


12
13
14
15
16
# File 'lib/lexical_units/words_and_sentences.rb', line 12

def self.words_and_sentences(text)
  LexicalUnits.sentences(text).map do |sentence|
    LexicalUnits.words(sentence)
  end
end

.words_without_digits(text) ⇒ Object

Split text into words without digits

self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']


9
10
11
# File 'lib/lexical_units/words_without_digits.rb', line 9

def self.words_without_digits(text)
  LexicalUnits.words(text).delete_if { |word| numeric?(word) }
end