Class: Ncd

Inherits:
Object
  • Object
show all
Defined in:
ext/lib/CompLearnLib/Ncd.rb

Instance Method Summary collapse

Constructor Details

#initializeNcd

Returns a new instance of Ncd.



19
20
21
22
# File 'ext/lib/CompLearnLib/Ncd.rb', line 19

def initialize()
    @config = CLConfig.getDefaultConfig()
    # for now
end

Instance Method Details

#buildPairs(list) ⇒ Object

build singles and pairs to compress from the given list



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'ext/lib/CompLearnLib/Ncd.rb', line 93

def buildPairs(list)
    cList = Array.new()
    list.each { |item|
        cList.push([item])
        if @config.isSymmetric?
            list.each { |item2|
                cList.push([item,item2])
            }
        else
            list.each { |item2|
                cList.push([item,item2])
                cList.push([item2,item])
            }
        end
    }
    return(cList)
end

#buildSinglePairs(a, list) ⇒ Object

build singles and pairs with the given object



114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'ext/lib/CompLearnLib/Ncd.rb', line 114

def buildSinglePairs(a,list)
    cList = Array.new()
    list.each { |item|
        cList.push([item])
        if @config.isSymmetric?
            cList.push([a,item])
        else
            cList.push([a,item])
            cList.push([item,a])
        end
    }
    return(cList)
end

#compress(list) ⇒ Object

execute the list of given compression tasks

list: a list of objects to compress of the

form [[object1],[object1,object2],..]

returns: a hash of compressed sizes, the key to

the hash is the id (or concatenation of ids)
of the compressible object(s)


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'ext/lib/CompLearnLib/Ncd.rb', line 54

def compress(list)

    # dispatch compression tasks
    results = { }
    todo = list.size
    TaskMaster.init
    list.each { |item|
        t = CompressionTask.new(item)
        TaskMaster.enqueue(t) { |res, t, src|
            # XXX put in resv chr
            results[t.idTotal] = res
            todo -= 1
        } 
    }

    # harvest results
    while(todo != 0)
        TaskMaster.waitForReply
    end

    # add vice versa compression if symmetric
    if @config.isSymmetric?
        list.each { |item|
            if item.size > 1
                abName = item.join()
                newItem = item.reverse()
                baName = newItem.join()
                results[baName] = results[abName]
            end
        }
    end
                
    return(results)
end

#ncd(ca, cb, cab, cba) ⇒ Object

get the ncd given pre compressed input

ca: size of compressed a cb: size of compressed b cab: size of compressed ab cba: size of compressed ba

returns: ncd of a and b



36
37
38
39
40
41
42
# File 'ext/lib/CompLearnLib/Ncd.rb', line 36

def ncd(ca,cb,cab,cba)
    maxk = ca < cb ? cb : ca
    kab = cba - cb
    kba = cab - ca
    maxck = kab < kba ? kba : kab
    (maxck.to_f / maxk.to_f).to_f
end

#ncdFile(a, b) ⇒ Object

calculate ncd from 2 files



155
156
157
158
159
# File 'ext/lib/CompLearnLib/Ncd.rb', line 155

def ncdFile(a, b)
    aObj = CompressionObject.fromFile(a)
    bObj = CompressionObject.fromFile(b)
    ncdSingle(aObj,bObj)
end

#ncdMatrix(list) ⇒ Object

calculate the mutual ncd of all lists objects



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'ext/lib/CompLearnLib/Ncd.rb', line 211

def ncdMatrix(list)
    # precompress everything necessary
    cList = buildPairs(list)
    c = compress(cList)

    # calculate ncd of a with everything else
    res = Array.new()
    list.each { |a|
        res << [ ]
        list.each { |b| 
            res[-1] << ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
        }
    }
    return(res)
end

#ncdMatrixFile(list) ⇒ Object

calculate the mutual ncd of a list of files



240
241
242
243
244
245
# File 'ext/lib/CompLearnLib/Ncd.rb', line 240

def ncdMatrixFile(list)
    listObj = list.collect { |item|
        CompressionObject.fromFile(item)
    }    
    ncdMatrix(listObj)
end

#ncdMatrixString(list) ⇒ Object

calculate the mutual ncd of a list of strings



230
231
232
233
234
235
# File 'ext/lib/CompLearnLib/Ncd.rb', line 230

def ncdMatrixString(list)  # most caching benefit
    listObj = list.collect { |item|
        CompressionObject.fromString(item)
    }    
    ncdMatrix(listObj)
end

#ncdSingle(a, b) ⇒ Object

calculate ncd from two cObjects



133
134
135
136
137
138
139
140
141
# File 'ext/lib/CompLearnLib/Ncd.rb', line 133

def ncdSingle(a,b)
    if @config.isSymmetric?
        c = compress([[a],[b],[a,b]])
        c["#{b}#{a}"] = c["#{a}#{b}"]
    else
        c = compress([[a],[b],[a,b],[b,a]])
    end
    ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
end

#ncdString(a, b) ⇒ Object

calculate ncd from 2 strings



146
147
148
149
150
# File 'ext/lib/CompLearnLib/Ncd.rb', line 146

def ncdString(a, b)
    aObj = CompressionObject.fromString(a, a)
    bObj = CompressionObject.fromString(b, b)
    ncdSingle(aObj,bObj)
end

#ncdVector(a, list) ⇒ Object

calculate the ncd between one object and a list of others



167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'ext/lib/CompLearnLib/Ncd.rb', line 167

def ncdVector(a,list)

    # precompress everything necessary
    cList = buildSinglePairs(a,list)
    cList.push([a])
    c = compress(cList)

    # calculate ncd of a with everything else
    res = Array.new()
    list.each { |b|
        res.push(ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"]))
    }
    return(res)
end

#ncdVectorFile(a, list) ⇒ Object

calculate the mutual ncd of a file with a list of files



197
198
199
200
201
202
203
# File 'ext/lib/CompLearnLib/Ncd.rb', line 197

def ncdVectorFile(a, list)
    aObj = CompressionObject.fromFile(a)
    listObj = list.collect { |item|
                CompressionObject.fromFile(item)
              }
    ncdVector(aObj,listObj) 
end

#ncdVectorString(a, list) ⇒ Object

calculate the mutual ncd of a string with a list of strings



185
186
187
188
189
190
191
# File 'ext/lib/CompLearnLib/Ncd.rb', line 185

def ncdVectorString(a, list)
    aObj = CompressionObject.fromString(a)
    listObj = list.collect { |item|
                CompressionObject.fromString(item)
              }
    ncdVector(aObj,listObj)
end