Module: RegularExpression::Compiler::X86
- Defined in:
- lib/regular_expression/compiler/x86.rb
Defined Under Namespace
Classes: Compiled
Class Method Summary collapse
-
.compile(cfg) ⇒ Object
Generate native code for a CFG.
Class Method Details
.compile(cfg) ⇒ Object
Generate native code for a CFG. This looks just like the Ruby generator but abstracted one level, or just like the interpreter but abstracted two levels!
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 |
# File 'lib/regular_expression/compiler/x86.rb', line 42 def self.compile(cfg) fisk = Fisk.new buffer = Fisk::Helpers.jitbuffer(1024) fisk.asm(buffer) do # Here we're setting up a couple of local variables that point to # registers so that it's easier to see what's actually going on # rax is a scratch register that is used for the return value of the # function return_value = rax # rcx is a scratch register that is used to track the index of the # string where we're currently looking string_index = rcx # rdx is a scratch register that is used to track the index of the # string where we've started the match match_index = rdx # rsp is a reserved register that stores a pointer to the stack stack_pointer = rsp # rbp is a reserved register that stores a pointer to the base of the # stack. It is also known as the frame pointer frame_pointer = rbp # rsi is a scratch register that stores the second argument to the # function, and in our case stores the length of the string string_length = rsi # rdi is a scratch register that stores the first argument to the # function, and in our case stores a pointer to the base of the string string_pointer = rdi # r8 is a scratch register that we're using to store the last read # character value from the string character_buffer = r8 # First we're going to do some initialization of the frame pointer and # stack pointer so we can clear the stack when we're done with this # function push frame_pointer mov frame_pointer, stack_pointer # Now we're going to initialize the counter to 0 so that we attempt to # match at each index of the input string xor match_index, match_index # This is the start of our loop, where at the beginning of the loop # we check if we have already finished looking at each index (in which # case we'll jump to a failure condition) make_label :start_loop_head cmp match_index, string_length jg label(:exit) # Set the string_index value to the match_index value so that we begin # each loop at the current match index mov string_index, match_index cfg.blocks.each do |block| # Label the start of each block so that we can jump between them make_label block.name block.insns.each do |insn| case insn when Bytecode::Insns::PushIndex push string_index when Bytecode::Insns::PopIndex pop string_index when Bytecode::Insns::GuardBegin cmp string_index, imm8(0) jne label(:exit) jmp label(cfg.exit_map[insn.guarded].name) when Bytecode::Insns::GuardEnd cmp string_index, string_length je label(cfg.exit_map[insn.guarded].name) when Bytecode::Insns::JumpAny no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpValue no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the instruction's character, # continue on to the next instruction if it's not equal cmp character_buffer, imm8(insn.char.ord) jne label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpValuesInvert no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to each of the instruction's # characters, continue on to the next instruction if any of them # are equal insn.chars.each do |value| cmp character_buffer, imm8(value.ord) je label(no_match_label) end # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpRange no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the left hand side of the # instruction's range, continue on to the next instruction if # it's outside the range cmp character_buffer, imm8(insn.left.ord) jl label(no_match_label) # Compare the character buffer to the right hand side of the # instruction's range, continue on to the next instruction if # it's outside the range cmp character_buffer, imm8(insn.right.ord) jg label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpRangeInvert no_match_label = :"no_match_#{insn.object_id}" match_label = :"match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the left hand side of the # instruction's range, jump down to the success case if it's # outside the range cmp character_buffer, imm8(insn.left.ord) jl label(match_label) # Compare the character buffer to the right hand side of the # instruction's range, continue on to the next instruction if # it's inside the range cmp character_buffer, imm8(insn.right.ord) jle label(no_match_label) # Move the string index forward and jump to the target # instruction make_label match_label inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::Jump jmp label(cfg.exit_map[insn.target].name) when Bytecode::Insns::Match # If we reach this instruction, then we've successfully matched # against the input string, so we're going to return the integer # that represents the index at which this match began mov return_value, match_index mov stack_pointer, frame_pointer pop frame_pointer ret when Bytecode::Insns::Fail inc match_index jmp label(:start_loop_head) else raise end end end # If we reach this instruction, then we've failed to match at every # possible index in the string, so we're going to return the length # of the string + 1 so that the caller knows that this match failed make_label :exit mov return_value, string_length inc return_value # Here we make sure to clean up after ourselves by returning the frame # pointer to its former position mov stack_pointer, frame_pointer pop frame_pointer ret end Compiled.new(buffer) end |