Class: PragmaticSegmenter::Segmenter

Inherits:
Object
  • Object
show all
Includes:
Languages
Defined in:
lib/pragmatic_segmenter/segmenter.rb

Overview

This class segments a text into an array of sentences.

Constant Summary

Constants included from Languages

Languages::LANGUAGE_CODES

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Languages

#cleaner_class, #language_module, #process_class

Constructor Details

#initialize(text:, language: nil, doc_type: nil, clean: true) ⇒ Segmenter

Returns a new instance of Segmenter.



10
11
12
13
14
15
16
17
18
19
20
# File 'lib/pragmatic_segmenter/segmenter.rb', line 10

def initialize(text:, language: nil, doc_type: nil, clean: true)
  return unless text
  @language = language || 'en'
  @doc_type = doc_type

  if clean
    @text = cleaner_class.new(text: text, doc_type: @doc_type).clean
  else
    @text = text
  end
end

Instance Attribute Details

#doc_typeObject (readonly)

Returns the value of attribute doc_type.



8
9
10
# File 'lib/pragmatic_segmenter/segmenter.rb', line 8

def doc_type
  @doc_type
end

#languageObject (readonly)

Returns the value of attribute language.



8
9
10
# File 'lib/pragmatic_segmenter/segmenter.rb', line 8

def language
  @language
end

#textObject (readonly)

Returns the value of attribute text.



8
9
10
# File 'lib/pragmatic_segmenter/segmenter.rb', line 8

def text
  @text
end

Instance Method Details

#segmentObject



22
23
24
25
# File 'lib/pragmatic_segmenter/segmenter.rb', line 22

def segment
  return [] unless @text
  process_class.new(text: @text, language: language_module).process
end