Class: Brill::Tagger
- Inherits:
-
Object
- Object
- Brill::Tagger
- Defined in:
- lib/brill/tagger.rb
Instance Method Summary collapse
-
#initialize(lexicon, lexical_rules, contextual_rules) ⇒ Tagger
constructor
A new instance of Tagger.
-
#tag(text) ⇒ Object
Tag a body of text returns an array like [[token,tag],…[token,tag]].
Constructor Details
#initialize(lexicon, lexical_rules, contextual_rules) ⇒ Tagger
Returns a new instance of Tagger.
5 6 7 8 9 10 |
# File 'lib/brill/tagger.rb', line 5 def initialize( lexicon, lexical_rules, contextual_rules ) @tagger = ::Tagger::BrillTagger.new Brill::Tagger.load_lexicon(@tagger,lexicon) Brill::Tagger.load_lexical_rules(@tagger,lexical_rules) Brill::Tagger.load_contextual_rules(@tagger,contextual_rules) end |
Instance Method Details
#tag(text) ⇒ Object
Tag a body of text returns an array like [[token,tag],…[token,tag]]
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/brill/tagger.rb', line 15 def tag( text ) tokens = Brill::Tagger.tokenize( text ) = Brill::Tagger.tag_start( tokens ) @tagger.apply_lexical_rules( tokens, , [], 0 ) @tagger.default_tag_finish( tokens, ) # Brill uses these fake "STAART" tags to delimit the start & end of sentence. tokens << "STAART" tokens << "STAART" tokens.unshift "STAART" tokens.unshift "STAART" << "STAART" << "STAART" .unshift "STAART" .unshift "STAART" @tagger.apply_contextual_rules( tokens, , 1 ) .shift .shift tokens.shift tokens.shift .pop .pop tokens.pop tokens.pop pairs = [] tokens.each_with_index do|t,i| pairs << [t,[i]] end pairs end |