Class: Abstractor::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/abstractor/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ Parser

Returns a new instance of Parser.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/abstractor/parser.rb', line 6

def initialize(abstractor_text, options = {})
  options = { new_line_is_sentence_break: true }.merge(options)
  @abstractor_text = abstractor_text

  puts options[:new_line_is_sentence_break]
  if options[:new_line_is_sentence_break]
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always'
  else
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two'
  end

  pipeline =  StanfordCoreNLP.load(:tokenize, :ssplit)
  t = StanfordCoreNLP::Annotation.new(@abstractor_text)
  pipeline.annotate(t)
  if @abstractor_text
    @sentences = t.get(:sentences).to_a.map do |s|
      {
        :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i,
        :begin_position  => s.get(:character_offset_begin).to_s.to_i,
        :end_position => s.get(:character_offset_end).to_s.to_i,
        :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase
      }
    end
  end
end

Instance Attribute Details

#abstractor_textObject

Returns the value of attribute abstractor_text.



4
5
6
# File 'lib/abstractor/parser.rb', line 4

def abstractor_text
  @abstractor_text
end

#sentencesObject

Returns the value of attribute sentences.



4
5
6
# File 'lib/abstractor/parser.rb', line 4

def sentences
  @sentences
end

Instance Method Details

#find_sentence(range) ⇒ Object



62
63
64
# File 'lib/abstractor/parser.rb', line 62

def find_sentence(range)
  @sentences.detect { |sentence| sentence[:range].include?(range) }
end

#match(token) ⇒ Object



43
44
45
46
# File 'lib/abstractor/parser.rb', line 43

def match(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.match(regular_expression) unless regular_expression.nil?
end

#match_position(match) ⇒ Object



53
54
55
# File 'lib/abstractor/parser.rb', line 53

def match_position(match)
  match.pre_match.size
end

#match_sentence(sentence, token) ⇒ Object



57
58
59
60
# File 'lib/abstractor/parser.rb', line 57

def match_sentence(sentence, token)
  regular_expression = prepare_token(token)
  sentence.match(prepare_token(token)) unless regular_expression.nil?
end

#range_all(token) ⇒ Object



48
49
50
51
# File 'lib/abstractor/parser.rb', line 48

def range_all(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil?
end

#scan(token, options = {}) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/abstractor/parser.rb', line 32

def scan(token, options = {})
  options[:word_boundary] = true if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  at = prepare_abstractor_text
  if (regular_expression.nil? || at.nil?)
    []
  else
    at.scan(regular_expression)
  end
end