Class: PragmaticSegmenter::Segmenter

Inherits:
Object
  • Object
show all
Includes:
LanguageSupport
Defined in:
lib/pragmatic_segmenter/segmenter.rb

Overview

This class segments a text into an array of sentences.

Constant Summary

Constants included from LanguageSupport

LanguageSupport::LANGUAGE_CODES

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from LanguageSupport

#cleaner_class, #process_class

Constructor Details

#initialize(text:, **args) ⇒ Segmenter

Returns a new instance of Segmenter.



32
33
34
35
36
37
38
39
40
# File 'lib/pragmatic_segmenter/segmenter.rb', line 32

def initialize(text:, **args)
  return unless text
  @language = args[:language] || 'en'
  @doc_type = args[:doc_type]
  @text = text.dup
  unless args[:clean].eql?(false)
    @text = cleaner_class.new(text: @text, doc_type: args[:doc_type]).clean
  end
end

Instance Attribute Details

#doc_typeObject (readonly)

Returns the value of attribute doc_type.



30
31
32
# File 'lib/pragmatic_segmenter/segmenter.rb', line 30

def doc_type
  @doc_type
end

#languageObject (readonly)

Returns the value of attribute language.



30
31
32
# File 'lib/pragmatic_segmenter/segmenter.rb', line 30

def language
  @language
end

#textObject (readonly)

Returns the value of attribute text.



30
31
32
# File 'lib/pragmatic_segmenter/segmenter.rb', line 30

def text
  @text
end

Instance Method Details

#segmentObject



42
43
44
45
# File 'lib/pragmatic_segmenter/segmenter.rb', line 42

def segment
  return [] unless text
  process_class.new(text: text, doc_type: doc_type).process
end