Class: DecisionTree::Node

Inherits:
Object
  • Object
show all
Defined in:
lib/decision-tree.rb

Instance Method Summary collapse

Constructor Details

#initialize(entries, columns = nil, algorithm = 'c45', dimension = nil, parent_node = nil, threshold = nil, path = nil) ⇒ Node

Returns a new instance of Node.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/decision-tree.rb', line 37

def initialize(entries, columns=nil, algorithm='c45', dimension=nil, parent_node=nil, threshold=nil, path=nil)
    @parent_node = parent_node
    @path = if path.nil?
        Array.new
    else
        path
    end

    @threshold = threshold

    @algorithm = if algorithm=='c45' or algorithm=='id3'
        algorithm
    else
        raise "Unknown algorithm"
    end

    @dimension = if dimension.nil?
        entries[0][:features].size
    else
        dimension
    end

    @columns = if columns.nil?
        @dimension.times.map{|i| "feature_#{i}"}
    elsif columns.size != @dimension
        raise "The number of columns is incorrect"
    else
        columns
    end


    @labels = entries.map{|x| x[:label]}
    @entropy = @labels.entropy
    @child_nodes = Hash.new

    return if @path.size == @dimension
    return if @entropy==0.0

    @path << choose_best_feature(entries)

    if @algorithm == 'id3'
      build_child_nodes(entries)
    elsif algorithm=='c45'
      if feature_type=='num'
        build_child_nodes_with_continuous_value(entries)
      else
        build_child_nodes(entries)
      end
    end
end

Instance Method Details

#feature_indexObject



89
90
91
# File 'lib/decision-tree.rb', line 89

def feature_index
  @path[-1]
end

#feature_nameObject



94
95
96
# File 'lib/decision-tree.rb', line 94

def feature_name
  @columns[ @path[-1] ].split(':')[0]
end

#feature_typeObject



99
100
101
102
# File 'lib/decision-tree.rb', line 99

def feature_type
  t = @columns[ @path[-1] ].split(':')[1]
  t || 'string'
end

#predict(vector, default = nil) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/decision-tree.rb', line 140

def predict(vector, default=nil)
  if @child_nodes.size==0
    probability = Hash.new(0)
    @labels.each{|k| probability[k] += 1 }
    probability.each{|k,v| probability[k] = v / @labels.size.to_f }
    return probability
  else
    if @algorithm=='c45' and feature_type=='num'
      curr_value = vector[feature_index]

      sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
        last_node = sorted_nodes[0][1]
        sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
          break if curr_value.to_f < feature_value.to_f
          last_node = child_node
        end

    return last_node.predict(vector,default)
    else
      feature_value = vector[feature_index]
      return default if not @child_nodes.has_key?(feature_value)
    return @child_nodes[feature_value].predict(vector,default)
  end
 end
end

#to_pseudo_code(buff = nil, indent = "") ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/decision-tree.rb', line 105

def to_pseudo_code(buff=nil,indent="")
      buff = Array.new if buff.nil?

    if @child_nodes.size==0
        result = @labels.to_set.to_a
        if result.size==1
            buff << "#{indent}return #{result[0]}"
        else
            buff << "#{indent}return #{@labels}"
        end
        return buff
    end

    if @algorithm=='c45' and feature_type=='num'
      sorted_nodes = @child_nodes.sort_by{|k,v| k.to_f }
      sorted_nodes[1..-1].to_a.each do |feature_value,child_node|
         buff << "#{indent}if(#{feature_name} >= #{feature_value}){"
         child_node.to_pseudo_code(buff, indent+"  " )
         buff << "#{indent}}"
     end
     buff << "#{indent}else{"
     sorted_nodes[0][1].to_pseudo_code(buff, indent+"  " )
     buff << "#{indent}}"
    else
      @child_nodes.each do |feature_value,child_node|
         buff << "#{indent}if(#{feature_name} == #{feature_value}){"
         child_node.to_pseudo_code(buff, indent+"  " )
         buff << "#{indent}}"
     end
    end

    return buff
end