Class: Abstractor::Parser

Inherits:

Object

Object
Abstractor::Parser

show all

Defined in:: lib/abstractor/parser.rb

Instance Attribute Summary collapse

#abstractor_text ⇒ Object

Returns the value of attribute abstractor_text.
#sentences ⇒ Object

Returns the value of attribute sentences.

Instance Method Summary collapse

#find_sentence(range) ⇒ Object
#initialize(abstractor_text, options = {}) ⇒ Parser constructor

A new instance of Parser.
#match(token) ⇒ Object
#match_position(match) ⇒ Object
#match_sentence(sentence, token) ⇒ Object
#range_all(token) ⇒ Object
#scan(token, options = {}) ⇒ Object

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/abstractor/parser.rb', line 6

def initialize(abstractor_text, options = {})
  options = { new_line_is_sentence_break: true }.merge(options)
  @abstractor_text = abstractor_text

  puts options[:new_line_is_sentence_break]
  if options[:new_line_is_sentence_break]
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always'
  else
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two'
  end

  pipeline =  StanfordCoreNLP.load(:tokenize, :ssplit)
  t = StanfordCoreNLP::Annotation.new(@abstractor_text)
  pipeline.annotate(t)
  if @abstractor_text
    @sentences = t.get(:sentences).to_a.map do |s|
      {
        :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i,
        :begin_position  => s.get(:character_offset_begin).to_s.to_i,
        :end_position => s.get(:character_offset_end).to_s.to_i,
        :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase
      }
    end
  end
end

Instance Attribute Details

#abstractor_text ⇒ `Object`

Returns the value of attribute abstractor_text.



4
5
6

# File 'lib/abstractor/parser.rb', line 4

def abstractor_text
  @abstractor_text
end

#sentences ⇒ `Object`

Returns the value of attribute sentences.



4
5
6

# File 'lib/abstractor/parser.rb', line 4

def sentences
  @sentences
end

Instance Method Details

#find_sentence(range) ⇒ `Object`



62
63
64

# File 'lib/abstractor/parser.rb', line 62

def find_sentence(range)
  @sentences.detect { |sentence| sentence[:range].include?(range) }
end

#match(token) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 43

def match(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.match(regular_expression) unless regular_expression.nil?
end

#match_position(match) ⇒ `Object`



53
54
55

# File 'lib/abstractor/parser.rb', line 53

def match_position(match)
  match.pre_match.size
end

#match_sentence(sentence, token) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 57

def match_sentence(sentence, token)
  regular_expression = prepare_token(token)
  sentence.match(prepare_token(token)) unless regular_expression.nil?
end

#range_all(token) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 48

def range_all(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil?
end

#scan(token, options = {}) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 32

def scan(token, options = {})
  options[:word_boundary] = true if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  at = prepare_abstractor_text
  if (regular_expression.nil? || at.nil?)
    []
  else
    at.scan(regular_expression)
  end
end

Class: Abstractor::Parser

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ Parser

Instance Attribute Details

#abstractor_text ⇒ Object

#sentences ⇒ Object

Instance Method Details

#find_sentence(range) ⇒ Object

#match(token) ⇒ Object

#match_position(match) ⇒ Object

#match_sentence(sentence, token) ⇒ Object

#range_all(token) ⇒ Object

#scan(token, options = {}) ⇒ Object

#initialize(abstractor_text, options = {}) ⇒ `Parser`

#abstractor_text ⇒ `Object`

#sentences ⇒ `Object`

#find_sentence(range) ⇒ `Object`

#match(token) ⇒ `Object`

#match_position(match) ⇒ `Object`

#match_sentence(sentence, token) ⇒ `Object`

#range_all(token) ⇒ `Object`

#scan(token, options = {}) ⇒ `Object`