Class: ENCODER

Inherits:
Object
  • Object
show all
Defined in:
lib/ENCODER.rb

Overview

███████╗███╗░░██╗░█████╗░░█████╗░██████╗░███████╗██████╗░██╔════╝████╗░██║██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔══██╗█████╗░░██╔██╗██║██║░░╚═╝██║░░██║██║░░██║█████╗░░██████╔╝██╔══╝░░██║╚████║██║░░██╗██║░░██║██║░░██║██╔══╝░░██╔══██╗███████╗██║░╚███║╚█████╔╝╚█████╔╝██████╔╝███████╗██║░░██║╚══════╝╚═╝░░╚══╝░╚════╝░░╚════╝░╚═════╝░╚══════╝╚═╝░░╚═╝The encoder class is used to translate input characters and sentances into one-hot encoded vectors. Additionally, the encoder class contains functions to handle file reading, regex filtering, and conversions between ‘Matrix’ and ‘NArray’ types.

Direct Known Subclasses

DICTIONARY

Instance Method Summary collapse

Instance Method Details

#filter(sentance, regex = nil) ⇒ Object



81
82
83
84
85
86
87
88
# File 'lib/ENCODER.rb', line 81

def filter(sentance, regex=nil)
	if regex == nil
		sentance = sentance.gsub(/[^A-Za-z\., ]/, '')
	else 
		sentance = sentance.gsub(regex, '')
	end
	return sentance
end

#hotDecodeCharacter(vector) ⇒ Object



116
117
118
119
120
121
122
123
124
125
# File 'lib/ENCODER.rb', line 116

def hotDecodeCharacter(vector)
	# convert vector to array so we can search for the up bit
	vectorArray = vector.to_a
	index = vectorArray.each_with_index.max[1] 
	if index != nil
		return @CharMatrix[index]
	else 
		return '@'
	end
end

#hotDecodeSentance(matrix) ⇒ Object



100
101
102
103
104
105
106
107
108
# File 'lib/ENCODER.rb', line 100

def hotDecodeSentance(matrix)
	sentance = ""
	for index in 0...matrix.row_count()
		charvector = matrix.row(index)
		char = hotDecodeCharacter(charvector)
		sentance = sentance + char 
	end
	return sentance
end

#hotEncodeCharacter(char) ⇒ Object



109
110
111
112
113
114
115
# File 'lib/ENCODER.rb', line 109

def hotEncodeCharacter(char)
	# reset selection matrix
	@SelectionMatrix = Matrix.build(1,@Length) {0}
	index = @CharMatrix.index char
	@SelectionMatrix[0,index] = 1
	return @SelectionMatrix.row(0)
end

#hotEncodeSentance(sentance) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/ENCODER.rb', line 89

def hotEncodeSentance(sentance)
	letters = sentance.split(//)
	matrix = Matrix.build(0, 0){0}
	for index in 0...letters.length()
		charvector = hotEncodeCharacter(letters[index])
		if charvector != nil
			matrix = Matrix.rows(matrix.to_a << charvector.to_a)
		end
	end
	return matrix
end

#init(charmatrix = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/ENCODER.rb', line 32

def init(charmatrix = nil)
	if charmatrix == nil
		@Length = 56
		@SelectionMatrix = Matrix.build(1,56) {0} # 1 row 32 columns
		@CharMatrix = Array['.',',',"\s",'!',
							'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
							'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
	else 
		@Length = charmatrix.length()
		@SelectionMatrix = Matrix.build(1, charmatrix.length()) { 0 }
		@CharMatrix = charmatrix
	end
end

#listFilesInDirectory(path) ⇒ Object



55
56
57
58
# File 'lib/ENCODER.rb', line 55

def listFilesInDirectory(path)
	fileNameArray = Dir.entries(path).reject {|f| File.directory?(f) || f[0].include?('.')}
	return fileNameArray
end

#matrixToNArray(matrix) ⇒ Object



45
46
47
48
# File 'lib/ENCODER.rb', line 45

def matrixToNArray(matrix)
	nArray = DFloat[*matrix.to_a]
	return nArray
end

#nArrayToMatrix(nArray) ⇒ Object



49
50
51
52
53
54
# File 'lib/ENCODER.rb', line 49

def nArrayToMatrix(nArray)
	#puts nArray.to_a.map(&:inspect)
	matrix = Matrix[*nArray.to_a]
	#puts matrix.to_a.map(&:inspect)
	return matrix
end

#readFileDataArray(fileDataArray, index, start, input_len, predict_len) ⇒ Object



71
72
73
74
# File 'lib/ENCODER.rb', line 71

def readFileDataArray(fileDataArray, index, start, input_len, predict_len)
	combination = hotEncodeSentance(filter(fileDataArray[index])[start,predict_len+input_len])
	return combination
end

#readPlaintextfile(fileName) ⇒ Object



75
76
77
78
79
80
# File 'lib/ENCODER.rb', line 75

def readPlaintextfile(fileName)
	file = File.open(fileName)
	file_data = file.read
	file.close
	return file_data
end

#readPlaintextFilesInDirectory(path) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/ENCODER.rb', line 59

def readPlaintextFilesInDirectory(path)
	fileDataArray = Array.new(0) { "" }
	# Get names of all files in a folder. 
	fileNameArray = Dir.entries(path).reject {|f| File.directory?(f) || f[0].include?('.')}
	# itterate through all files in Array
	for index in 0...fileNameArray.length()
		puts "reading file " + path + "/" + fileNameArray[index]
		fileData = readPlaintextfile(path + "/" + fileNameArray[index])
		fileDataArray = fileDataArray << fileData
	end
	return fileDataArray
end

#stringDifferencePercent(a, b) ⇒ Object



126
127
128
129
130
131
# File 'lib/ENCODER.rb', line 126

def stringDifferencePercent(a, b)
  longer = [a.size, b.size].max
  same = a.each_char.zip(b.each_char).count { |a,b| a == b }
  similarity = (longer - same) / a.size.to_f
  return similarity
end