Method: FeatureSet::DataSet#build_features_for

Defined in:
lib/feature_set/data_set.rb

#build_features_for(data, opts = {}) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/feature_set/data_set.rb', line 81

def build_features_for(data, opts = {})
  # FYI, we explicitly do not call before_build_features because this can be used on unknown rows for classification, and
  # we want our feature builders to keep any cached data from the previous 'build_features_from_data!' call.  This is important for
  # Wordvector, for example, since it needs to build the idf mappings beforehand and needs to re-use them on any new data.
  wrapped_data = opts[:already_wrapped] ? data : self.class.wrap_dataset(data)
  wrapped_data.map.with_index do |row, index|
    output_row = {}
    
    row.each do |key, datum|
      if key == :class
        output_row[:class] = datum
        next
      end
      
      if opts[:include_original] && (opts[:include_original].is_a?(TrueClass) || ![opts[:include_original][:except]].flatten.include?(key))
        output_row[key] = datum.value
      end

      feature_builders.each do |builder|
        builder.build_features(datum, key, row).each do |feature, value|
          output_row["#{key}_#{feature}".to_sym] = value
        end
      end
    end
    
    if index % 10 == 0
      STDERR.print "."; STDERR.flush
    end
    
    output_row
  end
end