Class: ENCODER
- Inherits:
-
Object
- Object
- ENCODER
- Defined in:
- lib/ENCODER.rb
Overview
███████╗███╗░░██╗░█████╗░░█████╗░██████╗░███████╗██████╗░██╔════╝████╗░██║██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔══██╗█████╗░░██╔██╗██║██║░░╚═╝██║░░██║██║░░██║█████╗░░██████╔╝██╔══╝░░██║╚████║██║░░██╗██║░░██║██║░░██║██╔══╝░░██╔══██╗███████╗██║░╚███║╚█████╔╝╚█████╔╝██████╔╝███████╗██║░░██║╚══════╝╚═╝░░╚══╝░╚════╝░░╚════╝░╚═════╝░╚══════╝╚═╝░░╚═╝The encoder class is used to translate input characters and sentances into one-hot encoded vectors. Additionally, the encoder class contains functions to handle file reading, regex filtering, and conversions between ‘Matrix’ and ‘NArray’ types.
Direct Known Subclasses
Instance Method Summary collapse
- #filter(sentance, regex = nil) ⇒ Object
- #hotDecodeCharacter(vector) ⇒ Object
- #hotDecodeSentance(matrix) ⇒ Object
- #hotEncodeCharacter(char) ⇒ Object
- #hotEncodeSentance(sentance) ⇒ Object
- #init(charmatrix = nil) ⇒ Object
- #listFilesInDirectory(path) ⇒ Object
- #matrixToNArray(matrix) ⇒ Object
- #nArrayToMatrix(nArray) ⇒ Object
- #readFileDataArray(fileDataArray, index, start, input_len, predict_len) ⇒ Object
- #readPlaintextfile(fileName) ⇒ Object
- #readPlaintextFilesInDirectory(path) ⇒ Object
- #stringDifferencePercent(a, b) ⇒ Object
Instance Method Details
#filter(sentance, regex = nil) ⇒ Object
81 82 83 84 85 86 87 88 |
# File 'lib/ENCODER.rb', line 81 def filter(sentance, regex=nil) if regex == nil sentance = sentance.gsub(/[^A-Za-z\., ]/, '') else sentance = sentance.gsub(regex, '') end return sentance end |
#hotDecodeCharacter(vector) ⇒ Object
116 117 118 119 120 121 122 123 124 125 |
# File 'lib/ENCODER.rb', line 116 def hotDecodeCharacter(vector) # convert vector to array so we can search for the up bit vectorArray = vector.to_a index = vectorArray.each_with_index.max[1] if index != nil return @CharMatrix[index] else return '@' end end |
#hotDecodeSentance(matrix) ⇒ Object
100 101 102 103 104 105 106 107 108 |
# File 'lib/ENCODER.rb', line 100 def hotDecodeSentance(matrix) sentance = "" for index in 0...matrix.row_count() charvector = matrix.row(index) char = hotDecodeCharacter(charvector) sentance = sentance + char end return sentance end |
#hotEncodeCharacter(char) ⇒ Object
109 110 111 112 113 114 115 |
# File 'lib/ENCODER.rb', line 109 def hotEncodeCharacter(char) # reset selection matrix @SelectionMatrix = Matrix.build(1,@Length) {0} index = @CharMatrix.index char @SelectionMatrix[0,index] = 1 return @SelectionMatrix.row(0) end |
#hotEncodeSentance(sentance) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/ENCODER.rb', line 89 def hotEncodeSentance(sentance) letters = sentance.split(//) matrix = Matrix.build(0, 0){0} for index in 0...letters.length() charvector = hotEncodeCharacter(letters[index]) if charvector != nil matrix = Matrix.rows(matrix.to_a << charvector.to_a) end end return matrix end |
#init(charmatrix = nil) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/ENCODER.rb', line 32 def init(charmatrix = nil) if charmatrix == nil @Length = 56 @SelectionMatrix = Matrix.build(1,56) {0} # 1 row 32 columns @CharMatrix = Array['.',',',"\s",'!', 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z', 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'] else @Length = charmatrix.length() @SelectionMatrix = Matrix.build(1, charmatrix.length()) { 0 } @CharMatrix = charmatrix end end |
#listFilesInDirectory(path) ⇒ Object
55 56 57 58 |
# File 'lib/ENCODER.rb', line 55 def listFilesInDirectory(path) fileNameArray = Dir.entries(path).reject {|f| File.directory?(f) || f[0].include?('.')} return fileNameArray end |
#matrixToNArray(matrix) ⇒ Object
45 46 47 48 |
# File 'lib/ENCODER.rb', line 45 def matrixToNArray(matrix) nArray = DFloat[*matrix.to_a] return nArray end |
#nArrayToMatrix(nArray) ⇒ Object
49 50 51 52 53 54 |
# File 'lib/ENCODER.rb', line 49 def nArrayToMatrix(nArray) #puts nArray.to_a.map(&:inspect) matrix = Matrix[*nArray.to_a] #puts matrix.to_a.map(&:inspect) return matrix end |
#readFileDataArray(fileDataArray, index, start, input_len, predict_len) ⇒ Object
71 72 73 74 |
# File 'lib/ENCODER.rb', line 71 def readFileDataArray(fileDataArray, index, start, input_len, predict_len) combination = hotEncodeSentance(filter(fileDataArray[index])[start,predict_len+input_len]) return combination end |
#readPlaintextfile(fileName) ⇒ Object
75 76 77 78 79 80 |
# File 'lib/ENCODER.rb', line 75 def readPlaintextfile(fileName) file = File.open(fileName) file_data = file.read file.close return file_data end |
#readPlaintextFilesInDirectory(path) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/ENCODER.rb', line 59 def readPlaintextFilesInDirectory(path) fileDataArray = Array.new(0) { "" } # Get names of all files in a folder. fileNameArray = Dir.entries(path).reject {|f| File.directory?(f) || f[0].include?('.')} # itterate through all files in Array for index in 0...fileNameArray.length() puts "reading file " + path + "/" + fileNameArray[index] fileData = readPlaintextfile(path + "/" + fileNameArray[index]) fileDataArray = fileDataArray << fileData end return fileDataArray end |
#stringDifferencePercent(a, b) ⇒ Object
126 127 128 129 130 131 |
# File 'lib/ENCODER.rb', line 126 def stringDifferencePercent(a, b) longer = [a.size, b.size].max same = a.each_char.zip(b.each_char).count { |a,b| a == b } similarity = (longer - same) / a.size.to_f return similarity end |