Class: PragmaticSegmenter::List

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_segmenter/list.rb

Overview

This class searches for a list within a string and adds newlines before each list item.

Constant Summary collapse

ROMAN_NUMERALS =
%w(i ii iii iv v vi vii viii ix x xi xii xiii xiv x xi xii xiii xv xvi xvii xviii xix xx)
LATIN_NUMERALS =
('a'..'z').to_a
ALPHABETICAL_LIST_WITH_PERIODS =
/(?<=^)[a-z](?=\.)|(?<=\A)[a-z](?=\.)|(?<=\s)[a-z](?=\.)/
ALPHABETICAL_LIST_WITH_PARENS =
/(?<=\()[a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))/i
SubstituteListPeriodRule =
Rule.new(//, '')
ListMarkerRule =
Rule.new(//, '')
SpaceBetweenListItemsFirstRule =
Rule.new(/(?<=\S\S|^)\s(?=\S\s*\d{1,2}♨)/, "\r")
SpaceBetweenListItemsSecondRule =
Rule.new(/(?<=\S\S|^)\s(?=\d{1,2}♨)/, "\r")
SpaceBetweenListItemsThirdRule =
Rule.new(/(?<=\S\S|^)\s(?=\d{1,2}☝)/, "\r")
NUMBERED_LIST_REGEX_1 =
/\s\d{1,2}(?=\.\s)|^\d{1,2}(?=\.\s)|\s\d{1,2}(?=\.\))|^\d{1,2}(?=\.\))|(?<=\s\-)\d{1,2}(?=\.\s)|(?<=^\-)\d{1,2}(?=\.\s)|(?<=\s\⁃)\d{1,2}(?=\.\s)|(?<=^\⁃)\d{1,2}(?=\.\s)|(?<=s\-)\d{1,2}(?=\.\))|(?<=^\-)\d{1,2}(?=\.\))|(?<=\s\⁃)\d{1,2}(?=\.\))|(?<=^\⁃)\d{1,2}(?=\.\))/
NUMBERED_LIST_REGEX_2 =
/(?<=\s)\d{1,2}\.(?=\s)|^\d{1,2}\.(?=\s)|(?<=\s)\d{1,2}\.(?=\))|^\d{1,2}\.(?=\))|(?<=\s\-)\d{1,2}\.(?=\s)|(?<=^\-)\d{1,2}\.(?=\s)|(?<=\s\⁃)\d{1,2}\.(?=\s)|(?<=^\⁃)\d{1,2}\.(?=\s)|(?<=\s\-)\d{1,2}\.(?=\))|(?<=^\-)\d{1,2}\.(?=\))|(?<=\s\⁃)\d{1,2}\.(?=\))|(?<=^\⁃)\d{1,2}\.(?=\))/
NUMBERED_LIST_PARENS_REGEX =
/\d{1,2}(?=\)\s)/
EXTRACT_ALPHABETICAL_LIST_LETTERS_REGEX =
/\([a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))/i
ALPHABETICAL_LIST_LETTERS_AND_PERIODS_REGEX =
/(?<=^)[a-z]\.|(?<=\A)[a-z]\.|(?<=\s)[a-z]\./i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text:) ⇒ List

Returns a new instance of List.



45
46
47
# File 'lib/pragmatic_segmenter/list.rb', line 45

def initialize(text:)
  @text = Text.new(text)
end

Instance Attribute Details

#textObject (readonly)

Returns the value of attribute text.



44
45
46
# File 'lib/pragmatic_segmenter/list.rb', line 44

def text
  @text
end

Instance Method Details

#add_line_breakObject



49
50
51
52
53
54
# File 'lib/pragmatic_segmenter/list.rb', line 49

def add_line_break
  format_alphabetical_lists
  format_roman_numeral_lists
  format_numbered_list_with_periods
  format_numbered_list_with_parens
end

#replace_parensObject



56
57
58
59
60
61
62
63
64
# File 'lib/pragmatic_segmenter/list.rb', line 56

def replace_parens
  ROMAN_NUMERALS.each do |rm|
    next unless text =~ /\(#{Regexp.escape(rm)}\)\s[A-Z]/
    text.gsub!(/\(#{Regexp.escape(rm)}\)(?=\s[A-Z])/) do |match|
      match.gsub!(/\(/, '&✂&').gsub!(/\)/, '&⌬&')
    end
  end
  text
end