Class: SentenceIt

Inherits:
Object
  • Object
show all
Defined in:
lib/sentence_it.rb,
ext/sentence_it/sentence_it.c

Constant Summary collapse

DEFAULT_RULES =
{
  break_pattern: "([ \t]*\n+)+[ \t]*", # one or more consecutive blank lines
  candidate_pattern: "[ \t]+",
  positive_rules: [
    ['[.!?]$', '^[0-9A-Z]'],
    [':$', '^[0-9]'],
    [':$', '^[A-Z][a-z]']
  ],
  negative_rules: [
    ['(Mrs|Mmes|Mr|Messrs|Ms|Prof|Dr|Drs|Rev|Hon|Sen|St)\.$', '^[A-Z][a-z]'],
    ['(Sr|Jr)\.$', '^[A-Z][a-z]'],
    ['\b[A-Z][a-z]*\.$', '^[0-9A-Z]'],
    ['(cf|vs)\.$', ''],
    ['e\.g\.$', ''],
    ['i\.e\.$', ''],
    ['(Sec|Chap|Fig|Eq)\.$', '^[0-9A-Z]']
  ]
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeObject

Class Method Details

.update_rules(rules) ⇒ Object



23
24
25
# File 'lib/sentence_it.rb', line 23

def self.update_rules(rules)
  DEFAULT_RULES.merge(rules)
end

Instance Method Details

#annotateObject

#segmentObject