Class: Ai4r::Clusterers::DBSCAN

Inherits:
Clusterer show all
Defined in:
lib/ai4r/clusterers/dbscan.rb

Overview

More about DBSCAN algorithm: en.wikipedia.org/wiki/DBSCAN

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Data::Parameterizable

#get_parameters, included, #set_parameters

Constructor Details

#initializeDBSCAN

Returns a new instance of DBSCAN.



23
24
25
26
27
28
29
30
# File 'lib/ai4r/clusterers/dbscan.rb', line 23

def initialize
  super()
  @distance_function = nil
  @epsilon = nil
  @min_points = 5
  @clusters = []
  @cluster_indices = []
end

Instance Attribute Details

#cluster_indicesObject (readonly)

Returns the value of attribute cluster_indices.



17
18
19
# File 'lib/ai4r/clusterers/dbscan.rb', line 17

def cluster_indices
  @cluster_indices
end

#clustersObject (readonly)

Returns the value of attribute clusters.



17
18
19
# File 'lib/ai4r/clusterers/dbscan.rb', line 17

def clusters
  @clusters
end

#data_setObject (readonly)

Returns the value of attribute data_set.



17
18
19
# File 'lib/ai4r/clusterers/dbscan.rb', line 17

def data_set
  @data_set
end

#labelsObject (readonly)

Returns the value of attribute labels.



17
18
19
# File 'lib/ai4r/clusterers/dbscan.rb', line 17

def labels
  @labels
end

#number_of_clustersObject (readonly)

Returns the value of attribute number_of_clusters.



17
18
19
# File 'lib/ai4r/clusterers/dbscan.rb', line 17

def number_of_clusters
  @number_of_clusters
end

Instance Method Details

#build(data_set, _number_of_clusters = nil) ⇒ DBSCAN

Build a new clusterer using data from data_set. An optional number_of_clusters argument is ignored and present only to keep a consistent interface with other clusterers.

Parameters:

Returns:

Raises:

  • (ArgumentError)


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/ai4r/clusterers/dbscan.rb', line 39

def build(data_set, _number_of_clusters = nil)
  @data_set = data_set
  @clusters = []
  @cluster_indices = []
  @labels = Array.new(data_set.data_items.size)
  @number_of_clusters = 0

  raise ArgumentError, 'epsilon must be defined' if @epsilon.nil?

  # Detect if the neighborhood of the current item
  # is dense enough
  data_set.data_items.each_with_index do |data_item, data_index|
    next unless @labels[data_index].nil?

    neighbors = range_query(data_item) - [data_index]
    if neighbors.size < @min_points
      @labels[data_index] = :noise
    else
      @number_of_clusters += 1
      @labels[data_index] = @number_of_clusters
      ds = Ai4r::Data::DataSet.new(data_labels: @data_set.data_labels)
      ds << data_item
      @clusters.push(ds)
      @cluster_indices.push([data_index])
      extend_cluster(neighbors, @number_of_clusters)
    end
  end

  raise 'number_of_clusters must be positive' if !@clusters.empty? && @number_of_clusters <= 0

  valid_labels = (1..@number_of_clusters).to_a << :noise
  raise 'labels must be cluster ids or :noise' unless @labels.all? { |l| valid_labels.include?(l) }

  self
end

#distance(a, b) ⇒ Object



88
89
90
91
92
93
94
95
# File 'lib/ai4r/clusterers/dbscan.rb', line 88

def distance(a, b)
  return @distance_function.call(a, b) if @distance_function

  Ai4r::Data::Proximity.squared_euclidean_distance(
    a.select { |att_a| att_a.is_a? Numeric },
    b.select { |att_b| att_b.is_a? Numeric }
  )
end

#eval(_data_item) ⇒ Object

This algorithm cannot classify new data items once it has been built. Rebuild the cluster with your new data item instead.

Parameters:

  • _data_item (Object)

Returns:

  • (Object)

Raises:

  • (NotImplementedError)


79
80
81
# File 'lib/ai4r/clusterers/dbscan.rb', line 79

def eval(_data_item)
  raise NotImplementedError, 'Eval of new data is not supported by this algorithm.'
end

#supports_eval?Object

Returns:

  • (Object)


84
85
86
# File 'lib/ai4r/clusterers/dbscan.rb', line 84

def supports_eval?
  false
end