Class: Natural

Inherits:
Object
  • Object
show all
Defined in:
lib/natural.rb,
lib/natural/fragment.rb

Defined Under Namespace

Classes: Alternative, Expansion, Fragment, Spelling, Synonym, Unused, Word

Constant Summary collapse

GREEN =
"\e[32m"
RED =
"\e[31m"
YELLOW =
"\e[33m"
CLEAR =
"\e[0m"
MATCHING_OPTIONS =
[:most_points, :first_match]
DEFAULT_SPELLINGS =
{'week' => ['wek', 'weeek'], 'begin' => ['beginn', 'beegin']}
DEFAULT_SYNONYMS =
{'1' => ['start', 'begin', 'commence'], '2' => ['stop', 'end', 'finish', 'conclude']}
DEFAULT_EXPANSIONS =
{'food' => ['grocery', 'eat out', 'eating out', 'dining out', 'dine out', 'dine in'], 'music' => ['audio cd', 'audio tape'], 'movie' => ['blu-ray', 'dvd', 'video']}
DEFAULT_MATCHING =
:most_points

Instance Method Summary collapse

Constructor Details

#initialize(text, options = {}) ⇒ Natural

Returns a new instance of Natural.



28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/natural.rb', line 28

def initialize(text, options={})
  @text = text.squeeze(' ').strip
  @options = options

  if options[:logger]
    @logger = options[:logger]
  else
    @logger = Logger.new(STDOUT)
    @logger.level = Logger::DEBUG
  end
  
  @parse        = parse
end

Instance Method Details

#answerObject



129
130
131
132
133
134
# File 'lib/natural.rb', line 129

def answer
  result = @parse.children.map_by_data(@options[:context]).select{|a| !a.blank?}.flatten
  @parse.children.map_by_all_filters.select{|a| !a.blank?}.each {|f| result = eval("result.#{f}")}
  @parse.children.map_by_aggregator.select{|a| !a.blank?}.each {|a| result = eval("result.#{a}")}
  result
end

#options=(options) ⇒ Object



47
48
49
50
# File 'lib/natural.rb', line 47

def options=(options)
  @options = options
  parse
end

#parseObject



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/natural.rb', line 52

def parse
  return @parse if @parse

  start_at = Time.now

  # search for all possible matches using all the different fragment classes
  matches_by_class = {}
  fragment_classes = @options[:fragment_classes] || ObjectSpace.each_object(Class)
  fragment_classes = fragment_classes.select {|a| a < Natural::Fragment && a != Natural::Unused}
  find_options = {
    :text => @text, 
    :matches => matches_by_class, 
    :matching => @options[:matching] || DEFAULT_MATCHING,
    :spellings => @options[:spellings] || DEFAULT_SPELLINGS, 
    :synonyms => @options[:synonyms] || DEFAULT_SYNONYMS, 
    :expansions => @options[:expansions] || DEFAULT_EXPANSIONS
  }

  if find_options[:matching] == :first_match
    # once a match has been found, exclude those words from further consideration
    # can help speed things up, but requires you order the candidate fragment_classes carefully
    fragment_classes.each do |klass|
      new_options = find_options.dup
      new_options[:ignore] = matches_by_class.values.flatten.select{|a| a}.map_by_ids.flatten.uniq.sort
      matches_by_class[klass] = klass.find(new_options)[klass] if klass.find(new_options)[klass]
    end
  else
    ObjectSpace.each_object(Class).select {|a| a < Natural::Alternative}.each do |klass| 
      matches_by_class = klass.find(find_options)
    end
    fragment_classes.each do |klass|
      matches_by_class = klass.find(find_options)
    end
  end

  matching_at = Time.now
  @logger.debug "[n][perf] matching took #{(matching_at - start_at).seconds.round(1)} seconds"

  # find all valid combinations, choose the one with the highest score
  sequences = []
  sequences = assemble_sequences(matches_by_class.values.flatten)
  sequences = sequences.uniq.sort {|a,b| b.map_by_score.sum <=> a.map_by_score.sum}
  fragments = sequences.first || []

  scoring_at = Time.now
  @logger.debug "[n][perf] scoring took #{(scoring_at - matching_at).seconds.round(1)} seconds"
  @logger.debug "[n]"

  # tag the leftover words as unused
  remaining_words = (0..@text.split(' ').size-1).to_a - (!fragments.blank? ? fragments.map_by_ids.flatten : [])
  remaining_words.each do |id|
    tag_match = Unused.new(:ids => [id], :text => @text.split(' ')[id])
    fragments << tag_match
  end

  # put the fragments we are using in order and assemble the final tree
  fragments = fragments.sort {|a,b| a.ids.first <=> b.ids.first}
  @parse = Fragment.new(:ids => (0..@text.split(' ').size-1).to_a, :text => @text)
  fragments.each {|a| @parse << a}
  
  sequences.each {|a| @logger.debug "[n][scor] #{a.map_by_score.sum.to_s.rjust(2, '0')} #{a.sort{|b,c| b.ids.first <=> c.ids.first}.join(' | ')}"}
  @logger.debug("[n]")
  @parse.pretty_to_s.each_line do |line|
    @logger.debug("[n][tree] #{line.gsub("\n", '')}")
  end
  @logger.debug("[n]")
  @logger.info("[n][orig] #{@text}" + (@options[:context] ? " (#{@options[:context]})" : ""))
  @logger.info("[n][used] #{interpretation}" + (@options[:context] ? " (#{@options[:context]})" : ""))

  @parse
end

#parse!Object



124
125
126
127
# File 'lib/natural.rb', line 124

def parse!
  @parse = nil
  parse
end

#text=(text) ⇒ Object



42
43
44
45
# File 'lib/natural.rb', line 42

def text=(text)
  @text = text
  parse
end