Class: Abstractor::Parser
- Inherits:
-
Object
- Object
- Abstractor::Parser
- Defined in:
- lib/abstractor/parser.rb
Instance Attribute Summary collapse
-
#abstractor_text ⇒ Object
Returns the value of attribute abstractor_text.
-
#sentences ⇒ Object
Returns the value of attribute sentences.
Instance Method Summary collapse
- #find_sentence(range) ⇒ Object
-
#initialize(abstractor_text, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #match(token) ⇒ Object
- #match_position(match) ⇒ Object
- #match_sentence(sentence, token) ⇒ Object
- #range_all(token) ⇒ Object
- #scan(token, options = {}) ⇒ Object
Constructor Details
#initialize(abstractor_text, options = {}) ⇒ Parser
Returns a new instance of Parser.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/abstractor/parser.rb', line 6 def initialize(abstractor_text, = {}) = { new_line_is_sentence_break: true }.merge() @abstractor_text = abstractor_text puts [:new_line_is_sentence_break] if [:new_line_is_sentence_break] StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always' else StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two' end pipeline = StanfordCoreNLP.load(:tokenize, :ssplit) t = StanfordCoreNLP::Annotation.new(@abstractor_text) pipeline.annotate(t) if @abstractor_text @sentences = t.get(:sentences).to_a.map do |s| { :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i, :begin_position => s.get(:character_offset_begin).to_s.to_i, :end_position => s.get(:character_offset_end).to_s.to_i, :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase } end end end |
Instance Attribute Details
#abstractor_text ⇒ Object
Returns the value of attribute abstractor_text.
4 5 6 |
# File 'lib/abstractor/parser.rb', line 4 def abstractor_text @abstractor_text end |
#sentences ⇒ Object
Returns the value of attribute sentences.
4 5 6 |
# File 'lib/abstractor/parser.rb', line 4 def sentences @sentences end |
Instance Method Details
#find_sentence(range) ⇒ Object
62 63 64 |
# File 'lib/abstractor/parser.rb', line 62 def find_sentence(range) @sentences.detect { |sentence| sentence[:range].include?(range) } end |
#match(token) ⇒ Object
43 44 45 46 |
# File 'lib/abstractor/parser.rb', line 43 def match(token) regular_expression = prepare_token(token) prepare_abstractor_text.match(regular_expression) unless regular_expression.nil? end |
#match_position(match) ⇒ Object
53 54 55 |
# File 'lib/abstractor/parser.rb', line 53 def match_position(match) match.pre_match.size end |
#match_sentence(sentence, token) ⇒ Object
57 58 59 60 |
# File 'lib/abstractor/parser.rb', line 57 def match_sentence(sentence, token) regular_expression = prepare_token(token) sentence.match(prepare_token(token)) unless regular_expression.nil? end |
#range_all(token) ⇒ Object
48 49 50 51 |
# File 'lib/abstractor/parser.rb', line 48 def range_all(token) regular_expression = prepare_token(token) prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil? end |
#scan(token, options = {}) ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/abstractor/parser.rb', line 32 def scan(token, = {}) [:word_boundary] = true if [:word_boundary].nil? regular_expression = prepare_token(token, ) at = prepare_abstractor_text if (regular_expression.nil? || at.nil?) [] else at.scan(regular_expression) end end |