Class: PatternRelExt

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/ner/patterns.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(patterns, slack = nil, type = nil) ⇒ PatternRelExt

Returns a new instance of PatternRelExt.



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/rbbt/ner/patterns.rb', line 91

def initialize(patterns, slack = nil, type = nil)
  patterns = case
             when (Hash === patterns or TSV === patterns)
               patterns
             when Array === patterns
               {:Relation => patterns}
             when String === patterns
               {:Relation => [patterns]}
             end

  @type = type

  tokenized_patterns = {}

  patterns.each do |key, values|
    tokenized_patterns[key] = values.collect do |v| 
      Token.tokenize(v, /(NP\[[^\]]+\])|\s+/) 
    end
  end

  PatternRelExt.prepare_chunk_patterns(new_token_trie, tokenized_patterns, type)
  token_trie.slack = slack || Proc.new{|t| t.type != 'O'}
end

Instance Attribute Details

#token_trieObject

Returns the value of attribute token_trie.



76
77
78
# File 'lib/rbbt/ner/patterns.rb', line 76

def token_trie
  @token_trie
end

#typeObject

Returns the value of attribute type.



76
77
78
# File 'lib/rbbt/ner/patterns.rb', line 76

def type
  @type
end

Class Method Details

.prepare_chunk_patterns(token_trie, patterns, type = nil) ⇒ Object



72
73
74
# File 'lib/rbbt/ner/patterns.rb', line 72

def self.prepare_chunk_patterns(token_trie, patterns, type = nil)
  token_trie.merge(transform_index(TokenTrieNER.process({}, patterns)), type)
end

.simple_pattern(sentence, patterns, type = nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
# File 'lib/rbbt/ner/patterns.rb', line 11

def self.simple_pattern(sentence, patterns, type = nil)
  patterns = Array === patterns ? patterns : [patterns]
  type ||= "Simple Pattern"
  regexpNER = RegExpNER.new type => patterns.collect{|p| /#{p}/}
  segments = sentence.segments
  segments = segments.values.flatten if Hash === segments
  Transformed.with_transform(sentence, segments, Proc.new{|s| s.type.to_s.upcase}) do |sentence|
    regexpNER.entities(sentence)
  end
end

.transform_index(index) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/rbbt/ner/patterns.rb', line 52

def self.transform_index(index)
  new = {}

  index.each do |key,next_index|
    if Hash === next_index
      new_key = transform_key(key)
      if Proc === new_key
        new[:PROCS] ||= {}
        new[:PROCS][new_key] = transform_index(next_index)
      else
        new[new_key] = transform_index(next_index)
      end
    else
      new[transform_key(key)] = next_index
    end
  end

  new
end

.transform_key(key) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/rbbt/ner/patterns.rb', line 23

def self.transform_key(key)
  case
  when key =~ /(.*)\[entity:(.*)\]/
    chunk_type, chunk_value = $1, $2
    annotation_types = chunk_value.split(",")
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).flatten.select{|a| NamedEntity === a}.collect{|a| a.type.to_s}.flatten & annotation_types).any? }

  when key =~ /(.*)\[code:(.*)\]/
    chunk_type, chunk_value = $1, $2
    annotation_codes = chunk_value.split(",")
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      ((Hash === chunk.segments ? chunk.segments.values.flatten : chunk.segments).select{|a| NamedEntity === a}.collect{|a| a.code}.flatten & annotation_codes).any? }

  when key =~ /(.*)\[stem:(.*)\]/
    chunk_type, chunk_value = $1, $2
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      chunk.split(/\s+/).select{|w| w.stem == chunk_value.stem}.any?}

  when key =~ /(.*)\[(.*)\]/
    chunk_type, chunk_value = $1, $2
    Proc.new{|chunk| (chunk_type == "all" or (Array === chunk.type ? chunk.type.include?(chunk_type) : chunk.type == chunk_type)) and 
      chunk.parts.values.select{|a| a == chunk_value}.any?}

  else
    key
  end
end

Instance Method Details

#match_chunks(chunks) ⇒ Object



115
116
117
118
119
# File 'lib/rbbt/ner/patterns.rb', line 115

def match_chunks(chunks)
  token_trie.match(chunks).each do |match|
    match.extend Relationship
  end
end

#match_sentences(sentences) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/rbbt/ner/patterns.rb', line 121

def match_sentences(sentences)
  sentence_chunks = NLP.gdep_chunk_sentences(sentences)

  sentences.zip(sentence_chunks).collect do |sentence, chunks|
    annotation_index = Segment.index(sentence.segments)
    chunks.each do |chunk|
      Segmented.setup(chunk, annotation_index[chunk.range])
    end

    match_chunks(chunks)
  end
end

#new_token_trieObject



77
78
79
# File 'lib/rbbt/ner/patterns.rb', line 77

def new_token_trie
  @token_trie = TokenTrieNER.new({})
end

#slack(slack) ⇒ Object



86
87
88
# File 'lib/rbbt/ner/patterns.rb', line 86

def slack(slack)
  @token_trie.slack = slack
end