Class: Abstractor::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/abstractor/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(abstractor_text, options = {}) ⇒ Parser

Returns a new instance of Parser.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/abstractor/parser.rb', line 6

def initialize(abstractor_text, options = {})
  options = { new_line_is_sentence_break: true }.merge(options)
  @abstractor_text = abstractor_text

  if options[:new_line_is_sentence_break]
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always'
  else
    StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two'
  end

  pipeline =  StanfordCoreNLP.load(:tokenize, :ssplit)
  t = StanfordCoreNLP::Annotation.new(@abstractor_text)
  pipeline.annotate(t)
  if @abstractor_text
    @sentences = t.get(:sentences).to_a.map do |s|
      {
        :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i,
        :begin_position  => s.get(:character_offset_begin).to_s.to_i,
        :end_position => s.get(:character_offset_end).to_s.to_i,
        :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase
      }
    end
  end
end

Instance Attribute Details

#abstractor_textObject

Returns the value of attribute abstractor_text.



4
5
6
# File 'lib/abstractor/parser.rb', line 4

def abstractor_text
  @abstractor_text
end

#sentencesObject

Returns the value of attribute sentences.



4
5
6
# File 'lib/abstractor/parser.rb', line 4

def sentences
  @sentences
end

Instance Method Details

#find_sentence(range) ⇒ Object



83
84
85
# File 'lib/abstractor/parser.rb', line 83

def find_sentence(range)
  @sentences.detect { |sentence| sentence[:range].include?(range) }
end

#match(token) ⇒ Object



63
64
65
66
# File 'lib/abstractor/parser.rb', line 63

def match(token)
  regular_expression = prepare_token(token)
  prepare_abstractor_text.match(regular_expression) unless regular_expression.nil?
end

#match_position(match) ⇒ Object



74
75
76
# File 'lib/abstractor/parser.rb', line 74

def match_position(match)
  match.pre_match.size
end

#match_sentence(sentence, token) ⇒ Object



78
79
80
81
# File 'lib/abstractor/parser.rb', line 78

def match_sentence(sentence, token)
  regular_expression = prepare_token(token)
  sentence.match(prepare_token(token)) unless regular_expression.nil?
end

#range_all(token, options = {}) ⇒ Object



68
69
70
71
72
# File 'lib/abstractor/parser.rb', line 68

def range_all(token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil?
end

#scan(token, options = {}) ⇒ Object



31
32
33
34
35
36
37
38
39
40
# File 'lib/abstractor/parser.rb', line 31

def scan(token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  at = prepare_abstractor_text
  if (regular_expression.nil? || at.nil?)
    []
  else
    at.scan(regular_expression)
  end
end

#sentence_match_scan(sentence, token, options = {}) ⇒ Object



52
53
54
55
56
57
58
59
60
61
# File 'lib/abstractor/parser.rb', line 52

def sentence_match_scan(sentence, token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  if (regular_expression.nil? || sentence.nil?)
    []
  else
    # http://stackoverflow.com/questions/6804557/how-do-i-get-the-match-data-for-all-occurrences-of-a-ruby-regular-expression-in
    sentence.to_enum(:scan,regular_expression).map{ Regexp.last_match }
  end
end

#sentence_scan(sentence, token, options = {}) ⇒ Object



42
43
44
45
46
47
48
49
50
# File 'lib/abstractor/parser.rb', line 42

def sentence_scan(sentence, token, options = {})
  options[:word_boundary] = true  if options[:word_boundary].nil?
  regular_expression = prepare_token(token, options)
  if (regular_expression.nil? || sentence.nil?)
    []
  else
    sentence.scan(regular_expression)
  end
end