Class: AnyStyle::ParserCore
- Inherits:
-
Object
- Object
- AnyStyle::ParserCore
- Includes:
- StringUtils
- Defined in:
- lib/anystyle/parser.rb
Class Attribute Summary collapse
-
.defaults ⇒ Object
readonly
Returns the value of attribute defaults.
-
.formats ⇒ Object
readonly
Returns the value of attribute formats.
Instance Attribute Summary collapse
-
#features ⇒ Object
readonly
Returns the value of attribute features.
-
#model ⇒ Object
readonly
Returns the value of attribute model.
-
#normalizers ⇒ Object
readonly
Returns the value of attribute normalizers.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Class Method Summary collapse
-
.instance ⇒ Object
Returns a default parser instance.
- .load(path) ⇒ Object
Instance Method Summary collapse
- #check(input) ⇒ Object
- #expand(dataset) ⇒ Object
-
#initialize(options = {}) ⇒ ParserCore
constructor
A new instance of ParserCore.
- #label(input, **opts) ⇒ Object
- #learn(input) ⇒ Object
- #load_model(file = ) ⇒ Object
- #normalize(hash, **opts) ⇒ Object
- #prepare(input, **opts) ⇒ Object
- #train(input = , truncate: true) ⇒ Object
Methods included from StringUtils
canonize, count, display_chars, display_width, indent, nnum, page_break?, scrub, strip_html, transliterate
Constructor Details
#initialize(options = {}) ⇒ ParserCore
Returns a new instance of ParserCore.
20 21 22 23 |
# File 'lib/anystyle/parser.rb', line 20 def initialize( = {}) @options = self.class.defaults.merge() load_model end |
Class Attribute Details
.defaults ⇒ Object (readonly)
Returns the value of attribute defaults.
6 7 8 |
# File 'lib/anystyle/parser.rb', line 6 def defaults @defaults end |
.formats ⇒ Object (readonly)
Returns the value of attribute formats.
6 7 8 |
# File 'lib/anystyle/parser.rb', line 6 def formats @formats end |
Instance Attribute Details
#features ⇒ Object (readonly)
Returns the value of attribute features.
18 19 20 |
# File 'lib/anystyle/parser.rb', line 18 def features @features end |
#model ⇒ Object (readonly)
Returns the value of attribute model.
18 19 20 |
# File 'lib/anystyle/parser.rb', line 18 def model @model end |
#normalizers ⇒ Object (readonly)
Returns the value of attribute normalizers.
18 19 20 |
# File 'lib/anystyle/parser.rb', line 18 def normalizers @normalizers end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
18 19 20 |
# File 'lib/anystyle/parser.rb', line 18 def @options end |
Class Method Details
.instance ⇒ Object
Returns a default parser instance
13 14 15 |
# File 'lib/anystyle/parser.rb', line 13 def instance Thread.current["anystyle_#{name.downcase}"] ||= new end |
.load(path) ⇒ Object
8 9 10 |
# File 'lib/anystyle/parser.rb', line 8 def load(path) new :model => path end |
Instance Method Details
#check(input) ⇒ Object
41 42 43 |
# File 'lib/anystyle/parser.rb', line 41 def check(input) model.check prepare(input, tagged: true) end |
#expand(dataset) ⇒ Object
68 69 70 |
# File 'lib/anystyle/parser.rb', line 68 def (dataset) raise NotImplementedError end |
#label(input, **opts) ⇒ Object
37 38 39 |
# File 'lib/anystyle/parser.rb', line 37 def label(input, **opts) model.label prepare(input, **opts) end |
#learn(input) ⇒ Object
53 54 55 |
# File 'lib/anystyle/parser.rb', line 53 def learn(input) train(input, truncate: false) end |
#load_model(file = ) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/anystyle/parser.rb', line 25 def load_model(file = [:model]) unless file.nil? @model = Wapiti.load(file) @model..update_attributes else @model = Wapiti::Model.new(.reject { |k,_| k == :model }) @model.path = [:model] end self end |
#normalize(hash, **opts) ⇒ Object
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/anystyle/parser.rb', line 57 def normalize(hash, **opts) normalizers.each do |n| begin hash = n.normalize(hash, **opts) unless n.skip? rescue => e warn "Error in #{n.name} normalizer: #{e.}" end end hash end |
#prepare(input, **opts) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/anystyle/parser.rb', line 72 def prepare(input, **opts) case input when Wapiti::Dataset input when Wapiti::Sequence Wapiti::Dataset.new([input]) when String if !input.tainted? && input.length < 1024 && File.exists?(input) Wapiti::Dataset.open(input, opts) else Wapiti::Dataset.parse(input, opts) end else Wapiti::Dataset.parse(input, opts) end end |
#train(input = , truncate: true) ⇒ Object
45 46 47 48 49 50 51 |
# File 'lib/anystyle/parser.rb', line 45 def train(input = [:training_data], truncate: true) load_model(nil) if truncate unless input.nil? || input.empty? model.train prepare(input, tagged: true) end model end |