Class: Abstractor::Parser

Inherits:

Object

Object
Abstractor::Parser

show all

Defined in:: lib/abstractor/parser.rb

Instance Attribute Summary collapse

#abstractor_text ⇒ Object

Returns the value of attribute abstractor_text.
#sentences ⇒ Object

Returns the value of attribute sentences.

Instance Method Summary collapse

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/abstractor/parser.rb', line 6

def initialize(abstractor_text, options = {})
  options = { new_line_is_sentence_break: true }.merge(options)
  @abstractor_text = abstractor_text

  if options[:new_line_is_sentence_break]
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always'
  else
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two'
  end

  pipeline =  StanfordCoreNLP.load(:tokenize, :ssplit)
  t = StanfordCoreNLP::Annotation.new(@abstractor_text)
  pipeline.annotate(t)
  if @abstractor_text
    @sentences = t.get(:sentences).to_a.map do |s|
      {
        :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i,
        :begin_position  => s.get(:character_offset_begin).to_s.to_i,
        :end_position => s.get(:character_offset_end).to_s.to_i,
        :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase
      }
    end
  end
end

Instance Attribute Details

#abstractor_text ⇒ `Object`

Returns the value of attribute abstractor_text.



4
5
6

# File 'lib/abstractor/parser.rb', line 4

def abstractor_text
  @abstractor_text
end

#sentences ⇒ `Object`

Returns the value of attribute sentences.



4
5
6

# File 'lib/abstractor/parser.rb', line 4

def sentences
  @sentences
end

Instance Method Details

#find_sentence(range) ⇒ `Object`



83
84
85

# File 'lib/abstractor/parser.rb', line 83

def find_sentence(range)
  @sentences.detect { |sentence| sentence[:range].include?(range) }
end

#match(token) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 63

def match(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.match(regular_expression) unless regular_expression.nil?
end

#match_position(match) ⇒ `Object`



74
75
76

# File 'lib/abstractor/parser.rb', line 74

def match_position(match)
  match.pre_match.size
end

#match_sentence(sentence, token) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 78

def match_sentence(sentence, token)
  regular_expression = prepare_token(token)
  sentence.match(prepare_token(token)) unless regular_expression.nil?
end

#range_all(token, options = {}) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 68

def range_all(token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil?
end

#scan(token, options = {}) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 31

def scan(token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  at = prepare_abstractor_text
  if (regular_expression.nil? || at.nil?)
    []
  else
    at.scan(regular_expression)
  end
end

#sentence_match_scan(sentence, token, options = {}) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 52

def sentence_match_scan(sentence, token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  if (regular_expression.nil? || sentence.nil?)
    []
  else
    # http://stackoverflow.com/questions/6804557/how-do-i-get-the-match-data-for-all-occurrences-of-a-ruby-regular-expression-in
    sentence.to_enum(:scan,regular_expression).map{ Regexp.last_match }
  end
end

#sentence_scan(sentence, token, options = {}) ⇒ `Object`

# File 'lib/abstractor/parser.rb', line 42

def sentence_scan(sentence, token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  if (regular_expression.nil? || sentence.nil?)
    []
  else
    sentence.scan(regular_expression)
  end
end

Class: Abstractor::Parser

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ Parser

Instance Attribute Details

#abstractor_text ⇒ Object

#sentences ⇒ Object

Instance Method Details

#find_sentence(range) ⇒ Object

#match(token) ⇒ Object

#match_position(match) ⇒ Object

#match_sentence(sentence, token) ⇒ Object

#range_all(token, options = {}) ⇒ Object

#scan(token, options = {}) ⇒ Object

#sentence_match_scan(sentence, token, options = {}) ⇒ Object

#sentence_scan(sentence, token, options = {}) ⇒ Object

#initialize(abstractor_text, options = {}) ⇒ `Parser`

#abstractor_text ⇒ `Object`

#sentences ⇒ `Object`

#find_sentence(range) ⇒ `Object`

#match(token) ⇒ `Object`

#match_position(match) ⇒ `Object`

#match_sentence(sentence, token) ⇒ `Object`

#range_all(token, options = {}) ⇒ `Object`

#scan(token, options = {}) ⇒ `Object`

#sentence_match_scan(sentence, token, options = {}) ⇒ `Object`

#sentence_scan(sentence, token, options = {}) ⇒ `Object`