Class: NodeMarshal

Inherits:
Object
  • Object
show all
Defined in:
lib/node-marshal.rb,
ext/node-marshal/nodedump.c

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(: srcfile, filename) ⇒ Object #new(: binfile, filename) ⇒ Object #new(: srcmemory, srcstr) ⇒ Object #new(: binmemory, binstr) ⇒ Object

Creates NodeMarshal class example from the source code or dumped syntax tree (NODEs), i.e. preparsed and packed source code. Created object can be used either for code execution or for saving it in the preparsed form (useful for code obfuscation/protection)



1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
# File 'ext/node-marshal/nodedump.c', line 1615

static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
{
  ID id_usr;
  rb_iv_set(self, "@show_offsets", Qfalse);
  Check_Type(source, T_SYMBOL);
  id_usr = SYM2ID(source);
  if (id_usr == rb_intern("srcfile"))
  {
    return m_nodedump_from_source(self, info);
  }
  else if (id_usr == rb_intern("srcmemory"))
  {
    return m_nodedump_from_string(self, info);
  }
  else if (id_usr == rb_intern("binmemory"))
  {
    return m_nodedump_from_memory(self, info);
  }
  else if (id_usr == rb_intern("binfile"))
  {
    VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
    VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
    return m_nodedump_from_memory(self, bin);
  }
  else
  {
    rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
  }
  return Qnil;
}

Class Method Details

.base85r_decode(input) ⇒ Object

Decode ASCII string in the modified BASE85 format to the binary string (useful for obfuscation of .rb source files)



1977
1978
1979
1980
# File 'ext/node-marshal/nodedump.c', line 1977

static VALUE m_base85r_decode(VALUE obj, VALUE input)
{
  return base85r_decode(input);
}

.base85r_encode(input) ⇒ Object

Encode arbitrary binary string to the ASCII string using modified version of BASE85 (useful for obfuscation of .rb source files)



1964
1965
1966
1967
# File 'ext/node-marshal/nodedump.c', line 1964

static VALUE m_base85r_encode(VALUE obj, VALUE input)
{
  return base85r_encode(input);
}

.compile_rb_file(outfile, inpfile, *args) ⇒ Object

compressed AST node and its loader. This functions is an envelope for NodeMarshal#to_compiled_rb



91
92
93
94
95
# File 'lib/node-marshal.rb', line 91

def self.compile_rb_file(outfile, inpfile, *args)
  node = NodeMarshal.new(:srcfile, inpfile)
  node.to_compiled_rb(outfile, *args)
  return true
end

Instance Method Details

#change_literal(old_lit, new_lit) ⇒ Object

Update the array with the list of literals (to be used for code obfuscation) Warning! This function is a stub!



1512
1513
1514
1515
1516
# File 'ext/node-marshal/nodedump.c', line 1512

static VALUE m_nodedump_change_literal(VALUE self, VALUE old_lit, VALUE new_lit)
{
    /* TO BE IMPLEMENTED */
    return self;
}

#change_symbol(old_sym, new_sym) ⇒ Object

Replace one symbol by another (to be used for code obfuscation)

  • old_sym – String that contains symbol name to be replaced

  • new_sym – String that contains new name of the symbol



1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
# File 'ext/node-marshal/nodedump.c', line 1424

static VALUE m_nodedump_change_symbol(VALUE self, VALUE old_sym, VALUE new_sym)
{
  VALUE val_nodehash = rb_iv_get(self, "@nodehash");
  VALUE syms, key;
  // Check if node is position-independent
  // (i.e. with initialized NODEInfo structure that contains
  // relocations for symbols)
  if (val_nodehash == Qnil)
    rb_raise(rb_eArgError, "This node is not preparsed into Hash");
  // Check data types of the input array
  if (TYPE(old_sym) != T_STRING)
  {
    rb_raise(rb_eArgError, "old_sym argument must be a string");
  }
  if (TYPE(new_sym) != T_STRING)
  {
    rb_raise(rb_eArgError, "new_sym argument must be a string");
  }
  // Get the symbol table from the Hash
  syms = rb_hash_aref(val_nodehash, ID2SYM(rb_intern("symbols")));
  if (syms == Qnil)
    rb_raise(rb_eArgError, "Preparsed hash has no :symbols field");
  // Check if new_sym is present in the symbol table
  key = rb_funcall(syms, rb_intern("find_index"), 1, new_sym);
  if (key != Qnil)
  {
    rb_raise(rb_eArgError, "new_sym value must be absent in table of symbols");
  }
  // Change the symbol in the preparsed Hash
  key = rb_funcall(syms, rb_intern("find_index"), 1, old_sym);
  if (key == Qnil)
    return Qnil;
  RARRAY_PTR(syms)[FIX2INT(key)] = new_sym;
  return self;
}

#compileObject

Creates the RubyVM::InstructionSequence object from the node



1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
# File 'ext/node-marshal/nodedump.c', line 1525

static VALUE m_nodedump_compile(VALUE self)
{
  NODE *node = RNODE(rb_iv_get(self, "@node"));
  VALUE nodename = rb_iv_get(self, "@nodename");
  VALUE filename = rb_iv_get(self, "@filename");
  VALUE filepath = rb_iv_get(self, "@filepath");
#ifndef WITH_RB_ISEQW_NEW
  /* For Pre-2.3 */
  return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
#else
  /* For Ruby 2.3 */
  return rb_iseqw_new(rb_iseq_new_top(node, nodename, filename, filepath, Qfalse));
#endif
}

#dump_treeObject

Transforms Ruby syntax tree (NODE) to the String using rb_parser_dump_tree function from node.c (see Ruby source code).



1653
1654
1655
1656
1657
# File 'ext/node-marshal/nodedump.c', line 1653

static VALUE m_nodedump_parser_dump_tree(VALUE self)
{
  NODE *node = RNODE(rb_iv_get(self, "@node"));
  return rb_parser_dump_tree(node, 0);
}

#dump_tree_shortObject

Transforms Ruby syntax tree (NODE) to the String using custom function instead of rb_parser_dump_tree function.

See also #show_offsets, #show_offsets=



1668
1669
1670
1671
1672
1673
1674
1675
# File 'ext/node-marshal/nodedump.c', line 1668

static VALUE m_nodedump_dump_tree_short(VALUE self)
{
  VALUE str = rb_str_new2(""); // Output string
  NODE *node = RNODE(rb_iv_get(self, "@node"));
  int show_offsets = (rb_iv_get(self, "@show_offsets") == Qtrue) ? 1 : 0;
  print_node(str, node, 0, show_offsets);
  return str;
}

#filenameObject

Returns name of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1903
1904
1905
1906
# File 'ext/node-marshal/nodedump.c', line 1903

static VALUE m_nodedump_filename(VALUE self)
{
  return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
}

#filename=(val) ⇒ Object

Sets name of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
# File 'ext/node-marshal/nodedump.c', line 1912

static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
{
  if (val != Qnil)
  {
    Check_Type(val, T_STRING);
    rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0)); 
  }
  else
  {
    rb_iv_set(self, "@filename", Qnil);
  }
  return self;
}

#filepathObject

Returns path of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1930
1931
1932
1933
# File 'ext/node-marshal/nodedump.c', line 1930

static VALUE m_nodedump_filepath(VALUE self)
{
  return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
}

#filepath=Object

Sets the path of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
# File 'ext/node-marshal/nodedump.c', line 1942

static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
{
  if (val != Qnil)
  {
    Check_Type(val, T_STRING);
    rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
  }
  else
  {
    rb_iv_set(self, "@filepath", Qnil);
  }
  return self;
}

#get_aliases_table(our_symbols) ⇒ Object

call-seq:

obj.get_aliases_table(our_symbols)

Returns a hash that has “old_sym_name”=>“new_sym_name”,… format. “new_sym_name” are generated automatically.

  • our_symbols – An array that contains the list of symbols (AS STRINGS,

NOT AS SYMBOLS) that can be renamed.



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/node-marshal.rb', line 155

def get_aliases_table(our_symbols)
  symbols_ary = get_safe_symbols(our_symbols)
  pos = 0;
  aliases_ary = symbols_ary.map do |sym|
    pos += 1
    if sym.length > 1 && sym[0..1] == '@@'
      "@@q#{pos}"
    elsif sym[0] == '@'
      "@q#{pos}"
    elsif sym[0] =~ /[A-Z]/
      "Q#{pos}"
    elsif sym[0] =~ /[a-z]/
      "q#{pos}"
    end
  end
  [symbols_ary, aliases_ary].transpose.to_h    
end

#get_safe_symbols(our_symbols) ⇒ Object

call-seq:

obj.get_safe_symbols(our_symbols)

Returns an array that contains strings with the names of symbols that are safe to change. It excludes symbols that are present in the table of literals (and their derivatives such as @x and x=). Such operation is useful for attr_readed, attr_writer and another similar metaprogramming techniques handling

  • our_symbols symbols created during node creation (must be found manually by the user by means of Symbol.all_symbols calling BEFORE and AFTER node creation.



137
138
139
140
141
142
143
144
145
# File 'lib/node-marshal.rb', line 137

def get_safe_symbols(our_symbols)
  self.to_hash # To initialize Hash with preparsed Ruby AST NODE
  symbolic_literals =  self.literals.select {|x| x.is_a?(Symbol)}.map {|x| x.to_s}
  fixed_symbols = [] + symbolic_literals
  fixed_symbols += symbolic_literals.map {|x| "@#{x}"}
  fixed_symbols += symbolic_literals.map {|x| "#{x}="}
  our_symbols = our_symbols.dup
  our_symbols -= fixed_symbols
end

#inspectObject

Gives the information about the node



1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
# File 'ext/node-marshal/nodedump.c', line 1797

static VALUE m_nodedump_inspect(VALUE self)
{
  static char str[1024], buf[512];
  VALUE num_of_nodes, nodename, filepath, filename;
  VALUE val_obj_addresses, val_nodeinfo;
  // Get generic information about node
  num_of_nodes = rb_iv_get(self, "@num_of_nodes");
  nodename = rb_iv_get(self, "@nodename");
  filepath = rb_iv_get(self, "@filepath");
  filename = rb_iv_get(self, "@filename");
  // Generate string with generic information about node
  sprintf(str,
    "----- NodeMarshal:0x%"PRIxPTR"\n"
    "    num_of_nodes: %d\n    nodename: %s\n    filepath: %s\n    filename: %s\n",
    (uintptr_t) (self),
    (num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
    (nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
    (filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
    (filename == Qnil) ? "nil" : RSTRING_PTR(filename)
    );
  // Check if the information about node struct is available
  val_nodeinfo = rb_iv_get(self, "@nodeinfo");
  val_obj_addresses = rb_iv_get(self, "@obj_addresses");
  if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
  {
    m_nodedump_to_hash(self);
    val_nodeinfo = rb_iv_get(self, "@nodeinfo");
  }
  // Information about preparsed node
  // a) NODEInfo struct
  if (val_nodeinfo == Qnil)
  {
    sprintf(buf, "    NODEInfo struct is empty\n");
  }
  else
  {
    NODEInfo *ninfo;
    Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
    sprintf(buf, 
      "    NODEInfo struct:\n"
      "      syms hash len (Symbols):         %d\n"
      "      lits hash len (Literals):        %d\n"
      "      idtabs hash len (ID tables):     %d\n"
      "      gentries hash len (Global vars): %d\n"
      "      nodes hash len (Nodes):          %d\n"
#ifdef USE_RB_ARGS_INFO
      "      args hash len (args info):       %d\n"
#endif
      ,
      FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
      FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
      FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
      FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
      FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0))
#ifdef USE_RB_ARGS_INFO
      ,
      FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
#endif
    );
  }
  strcat(str, buf);
  // b) NODEObjAddresses struct
  if (val_obj_addresses == Qnil)
  {
    sprintf(buf, "    NODEObjAddresses struct is empty\n");
  }
  else
  {
    NODEObjAddresses *objadr;
    Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
    sprintf(buf, 
      "    NODEObjAddresses struct:\n"
      "      syms_len (Num of symbols):      %d\n"
      "      lits_len (Num of literals):     %d\n"
      "      idtbls_len (Num of ID tables):  %d\n"
      "      gvars_len (Num of global vars): %d\n"
      "      nodes_len (Num of nodes):       %d\n"
#ifdef USE_RB_ARGS_INFO
      "      args_len: (Num of args info):   %d\n"
#endif
      , objadr->syms_len, objadr->lits_len,
      objadr->idtbls_len, objadr->gvars_len,
      objadr->nodes_len
#ifdef USE_RB_ARGS_INFO
      , objadr->args_len
#endif
    );
  }
  strcat(str, buf);
  strcat(str, "------------------\n");
  // Generate output string
  return rb_str_new2(str);
}

#literalsObject

Return array with the list of literals



1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
# File 'ext/node-marshal/nodedump.c', line 1463

static VALUE m_nodedump_literals(VALUE self)
{
  int i;
  VALUE val_relocs, val_nodeinfo, lits;
  // Variant 1: node loaded from file. It uses NODEObjAddresses struct
  // with the results of Ruby NODE structure parsing.
  val_relocs = rb_iv_get(self, "@obj_addresses");
  if (val_relocs != Qnil)
  {
    NODEObjAddresses *relocs;

    Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
    lits = rb_ary_new();
    for (i = 0; i < relocs->lits_len; i++)
    {
      VALUE val = relocs->lits_adr[i];
      int t = TYPE(val);
      if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
        val = rb_funcall(val, rb_intern("dup"), 0);
      rb_ary_push(lits, val);
    }
    return lits;
  }
  // Variant 2: node saved to file (parsed from memory). It uses
  // NODEInfo struct that is initialized during node dump parsing.
  val_nodeinfo = rb_iv_get(self, "@nodeinfo");
  if (val_nodeinfo != Qnil)
  {
    NODEInfo *ninfo;
    VALUE *ary;
    Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
    lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
    ary = RARRAY_PTR(lits);
    for (i = 0; i < RARRAY_LEN(lits); i++)
    {
      int t = TYPE(ary[i]);
      if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
        ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
    }
    return lits;
  }
  rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");
}

#nodeObject

Returns node object



1999
2000
2001
2002
# File 'ext/node-marshal/nodedump.c', line 1999

static VALUE m_nodedump_node(VALUE self)
{
  return rb_iv_get(self, "@node");
}

#nodenameObject

Returns node name (usually <main>)



1894
1895
1896
1897
# File 'ext/node-marshal/nodedump.c', line 1894

static VALUE m_nodedump_nodename(VALUE self)
{
  return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
}

#rebuildObject

call-seq:

obj.rebuild

Rebuilds the node by converting it to the binary dump and further restoring of it from this dump. It doesn’t change the original node and returns rebuilt node.



205
206
207
# File 'lib/node-marshal.rb', line 205

def rebuild
  NodeMarshal.new(:binmemory, to_bin)
end

#rename_ivars(*args) ⇒ Object

call-seq:

obj.rename_ivars


175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/node-marshal.rb', line 175

def rename_ivars(*args)
  if args.size == 0
    excl_names = []
  else
    excl_names = args[0]
  end

  to_hash
  syms = @nodehash[:symbols].select {|x| (x =~ /@[^@]/) == 0}
  pos = 1;
  syms_new = syms.map do |x|
    if excl_names.find_index(x[1..-1]) != nil
      str = x
    else
      str = "@ivar#{pos}"
    end
    pos = pos + 1;
    str
  end
  syms_subs =  [syms, syms_new].transpose.to_h
  replace_symbols(syms_subs)
  self
end

#replace_symbols(syms_subs) ⇒ Object

call-seq:

obj.replace_symbols(syms_subs)

Replaces some symbols inside parsed AST to user-defined aliases. It is designed to make code obfuscation easier. Be careful when using this ability: it is possible to break external libraries calls, operators overloading and some metaprogramming techniques.

  • syms_subs – Hash with the table of aliases. Keys are original names,

values are aliases. Keys and values MUST BE strings (not symbols!).



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/node-marshal.rb', line 106

def replace_symbols(syms_subs)
  # Check input data
  # a) type
  if !(syms_subs.is_a?(Hash))
    raise "symb_subs must be a hash"
  end
  # b) uniqueness of values inside the hash
  values = syms_subs.values
  if values.size != values.uniq.size
    raise ArgumentError, "values (new names) must be unique"
  end
  # c) uniqueness of values after replacement
  # TODO: MAKE IT!!!
  # Use NodeMarshal C part to replace the symbols
  self.to_hash # To initialize Hash with preparsed Ruby AST NODE
  syms_subs.each do |key, value|
    change_symbol(key, value)
  end
  self
end

#show_offsetsObject

Returns show_offsets property (used by NodeMarshal#dump_tree_short) It can be either true or false



1684
1685
1686
1687
# File 'ext/node-marshal/nodedump.c', line 1684

static VALUE m_nodedump_show_offsets(VALUE self)
{
  return rb_iv_get(self, "@show_offsets");
}

#show_offsets=Object

Sets show_offsets property (used by NodeMarshal#dump_tree_short) It can be either true or false



1696
1697
1698
1699
1700
1701
1702
1703
# File 'ext/node-marshal/nodedump.c', line 1696

static VALUE m_nodedump_set_show_offsets(VALUE self, VALUE value)
{
  if (value != Qtrue && value != Qfalse)
  {
    rb_raise(rb_eArgError, "show_offsets property must be either true or false");
  }
  return rb_iv_set(self, "@show_offsets", value);
}

#symbolsObject

Return array with the list of symbols



1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
# File 'ext/node-marshal/nodedump.c', line 1383

static VALUE m_nodedump_symbols(VALUE self)
{
  int i;
  VALUE val_relocs, val_nodeinfo, syms;
  // Variant 1: node loaded from file
  val_relocs = rb_iv_get(self, "@obj_addresses");
  if (val_relocs != Qnil)
  {
    NODEObjAddresses *relocs;
    Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
    syms = rb_ary_new();
    for (i = 0; i < relocs->syms_len; i++)
      rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
    return syms;
  }
  // Variant 2: node saved to file (parsed from memory)
  val_nodeinfo = rb_iv_get(self, "@nodeinfo");
  if (val_nodeinfo != Qnil)
  {
    NODEInfo *ninfo;
    VALUE *ary;
    Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
    syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
    ary = RARRAY_PTR(syms);
    for (i = 0; i < RARRAY_LEN(syms); i++)
    {
      ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
    }
    return syms;
  }
  rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
}

#to_binObject

Converts NodeMarshal class example to the binary string that can be saved to the file and used for loading the node from the file. Format of the obtained binary dump depends on used platform (especially size of the pointer) and Ruby version.



1787
1788
1789
1790
1791
1792
# File 'ext/node-marshal/nodedump.c', line 1787

static VALUE m_nodedump_to_bin(VALUE self)
{
  VALUE hash = m_nodedump_to_hash(self);
  VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
  return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
}

#to_compiled_rb(outfile, *args) ⇒ Object

call-seq:

obj.to_compiled_rb(outfile, opts)

Transforms node to the Ruby file

  • outfile – name of the output file

  • opts – Hash with options (:compress, :so_path) :compress can be true or false, :so_path is a test string with the command for nodemarshal.so inclusion (default is require_relative '../ext/node-marshal/nodemarshal.so')

See also NodeMarshal::compile_rb_file



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/node-marshal.rb', line 37

def to_compiled_rb(outfile, *args)
  compress = true
  so_path = "require_relative '../ext/node-marshal/nodemarshal.so'"
  if args.length > 0
    opts = args[0]
    if opts.has_key?(:compress)
      compress = opts[:compress]
    end
    if opts.has_key?(:so_path)
      so_path = opts[:so_path]
    end
  end
  # Compression
  if compress
    if !defined?(Zlib)
      raise "Compression is not supported: Zlib is absent"
    end
    zlib_include = "require 'zlib'"
    data_txt = NodeMarshal.base85r_encode(Zlib::deflate(self.to_bin))
    data_bin = "Zlib::inflate(NodeMarshal.base85r_decode(data_txt))"
  else
    zlib_include = "# No compression"
    data_txt = self.to_text
    data_bin = "NodeMarshal.base85r_decode(data_txt)"
  end
  # Document header
  txt = "# Ruby compressed source code\n# RUBY_PLATFORM: \#{RUBY_PLATFORM}\n# RUBY_VERSION: \#{RUBY_VERSION}\n\#{zlib_include}\n\#{so_path}\ndata_txt = <<DATABLOCK\n\#{data_txt}\nDATABLOCK\ndata_bin = \#{data_bin}\nnode = NodeMarshal.new(:binmemory, data_bin)\nnode.filename = __FILE__\nnode.filepath = File.expand_path(node.filename)\nnode.compile.eval\n"
  # Process input arguments
  if outfile != nil
    File.open(outfile, 'w') {|fp| fp << txt}
  end
  return txt
end

#to_hashObject

Converts NodeMarshal class example to the hash that contains full and independent from data structures memory addresses information. Format of the obtained hash depends on used platform (especially size of the pointer) and Ruby version.

Format of the hash

Part 1: Signatures

  • MAGIC – NODEMARSHAL11

  • RUBY_PLATFORM – saved RUBY_PLATFORM constant value

  • RUBY_VERSION – saved RUBY_VERSION constant value

Part 2: Program loadable elements.

All loadable elements are arrays. Index of the array element means its identifier that is used in the node tree.

  • literals – program literals (strings, ranges etc.)

  • symbols – program symbols (values have either String or Fixnum data type; numbers are used for symbols that cannot be represented as strings)

  • global_entries – global variables information

  • id_tables – array of arrays. Each array contains symbols IDs

  • args – information about code block argument(s)

Part 3: Nodes information

  • nodes – string that contains binary encoded information about the nodes

  • num_of_nodes – number of nodes in the nodes field

  • nodename – name of the node (usually “<main>”)

  • filename – name (without path) of .rb file used for the node generation

  • filepath – name (with full path) of .rb file used for the node generation



1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
# File 'ext/node-marshal/nodedump.c', line 1743

static VALUE m_nodedump_to_hash(VALUE self)
{
  NODE *node = RNODE(rb_iv_get(self, "@node"));
  NODEInfo *info;
  VALUE ans, num, val_info;
  // DISABLE GARBAGE COLLECTOR (important for dumping)
  rb_gc_disable();
  // Convert the node to the form with relocs (i.e. the information about node)
  // if such form is not present
  val_info = rb_iv_get(self, "@nodeinfo");
  if (val_info == Qnil)
  {
    val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
      NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
    NODEInfo_init(info);
    rb_iv_set(self, "@nodeinfo", val_info);
    num = INT2FIX(count_num_of_nodes(node, node, info));
    rb_iv_set(self, "@nodeinfo_num_of_nodes", num);
    // Convert node to NODEInfo structure
    ans = NODEInfo_toHash(info);
    rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
    rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
    rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
    rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
    rb_iv_set(self, "@nodehash", ans);
  }
  else
  {
    ans = rb_iv_get(self, "@nodehash");
  }
  // ENABLE GARBAGE COLLECTOR (important for dumping)
  rb_gc_enable();
  return ans;
}

#to_textObject

Converts NodeMarshal class example to the text string (modified Base85 encoding) that can be saved to the file and used for loading the node from the file. Format of the obtained binary dump depends on used platform (especially size of the pointer) and Ruby version.



1990
1991
1992
1993
1994
# File 'ext/node-marshal/nodedump.c', line 1990

static VALUE m_nodedump_to_text(VALUE self)
{
  VALUE bin = m_nodedump_to_bin(self);
  return base85r_encode(bin);
}