Module: RegularExpression::CFG

Defined in:
lib/regular_expression/cfg.rb

Overview

The CFG is a directed graph of extended basic blocks of bytecode instructions. This module has objects to represent the EBB, a graph object which contains a set of EBB, and a builder that creates a CFG from a compiled bytecode object.

Defined Under Namespace

Classes: ExtendedBasicBlock, Graph

Class Method Summary collapse

Class Method Details

.build(compiled) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/regular_expression/cfg.rb', line 9

def self.build(compiled)
  # Each label in the compiled bytecode starts a block, as does the first
  # instruction
  all_blocks = { start: 0 }.merge(compiled.labels)
  all_block_addresses = all_blocks.values

  # We're going to create a potentially larger map of labels, and we'll be
  # maintaining a reverse map as well.
  all_labels = compiled.labels.dup
  all_labels_reverse = all_labels.invert

  # These are the blocks we're finding - indexed by their start address.
  blocks = {}

  # Go through each block.
  all_blocks.each do |name, start_n|
    # We're going to collect up the instructions in the block, and the
    # labels it exits to.
    block_insns = []
    block_exits = Set.new

    insn_n = start_n

    loop do
      # Does another instruction jump here? If so it's the end of the EBB,
      # as EBBs have only one entry point.
      if insn_n != start_n && all_block_addresses.include?(insn_n)
        # As the EBB ends here - we should jump to the next EBB.
        target = all_labels_reverse[insn_n]
        unless target
          target = :"extra#{insn_n}"
          all_labels[target] = insn_n
          all_labels_reverse[insn_n] = target
        end
        block_insns.push(Bytecode::Insns::Jump.new(target))
        block_exits.add(target)
        break
      end

      # Examine each instruction.
      insn = compiled.insns[insn_n]
      block_insns.push(insn)

      # Remember which blocks exit to this target.
      case insn
      when Bytecode::Insns::PushIndex, Bytecode::Insns::PopIndex
        insn_n += 1
      when Bytecode::Insns::GuardBegin, Bytecode::Insns::GuardEnd
        block_exits.add(insn.guarded)
        insn_n += 1
      when Bytecode::Insns::JumpAny, Bytecode::Insns::JumpValuesInvert,
            Bytecode::Insns::JumpRange, Bytecode::Insns::JumpRangeInvert,
            Bytecode::Insns::JumpValue
        block_exits.add(insn.target)
        insn_n += 1
      when Bytecode::Insns::Jump
        block_exits.add(insn.target)
        break
      when Bytecode::Insns::Match, Bytecode::Insns::Fail
        break
      else
        raise
      end
    end

    blocks[start_n] = ExtendedBasicBlock.new(name, block_insns, block_exits.to_a)
  end

  # Create a map of jump target labels to the blocks that contain them.
  exit_map = {}
  blocks.each_value do |block|
    block.exits.each do |exit|
      exit_map[exit] ||= blocks[all_labels[exit]]
    end
  end

  Graph.new(blocks.values, exit_map)
end

.to_dot(cfg) ⇒ Object



88
89
90
91
92
93
94
# File 'lib/regular_expression/cfg.rb', line 88

def self.to_dot(cfg)
  graph = Graphviz::Graph.new
  cfg.to_dot(graph)

  Graphviz.output(graph, path: "build/cfg.svg", format: "svg")
  graph.to_dot
end