Class: NodeMarshal

Inherits:
Object
  • Object
show all
Defined in:
lib/node-marshal.rb,
ext/node-marshal/nodedump.c

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(: srcfile, filename) ⇒ Object #new(: binfile, filename) ⇒ Object #new(: srcmemory, srcstr) ⇒ Object #new(: binmemory, binstr) ⇒ Object

Creates NodeMarshal class example from the source code or dumped syntax tree (NODEs), i.e. preparsed and packed source code. Created object can be used either for code execution or for saving it in the preparsed form (useful for code obfuscation/protection)



1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
# File 'ext/node-marshal/nodedump.c', line 1425

static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
{
	ID id_usr;
	Check_Type(source, T_SYMBOL);
	id_usr = SYM2ID(source);
	if (id_usr == rb_intern("srcfile"))
	{
		return m_nodedump_from_source(self, info);
	}
	else if (id_usr == rb_intern("srcmemory"))
	{
		return m_nodedump_from_string(self, info);
	}
	else if (id_usr == rb_intern("binmemory"))
	{
		return m_nodedump_from_memory(self, info);
	}
	else if (id_usr == rb_intern("binfile"))
	{
		VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
		VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
		return m_nodedump_from_memory(self, bin);
	}
	else
	{
		rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
	}
	return Qnil;
}

Class Method Details

.base85r_decode(input) ⇒ Object

Decode ASCII string in the modified BASE85 format to the binary string (useful for obfuscation of .rb source files)



1734
1735
1736
1737
# File 'ext/node-marshal/nodedump.c', line 1734

static VALUE m_base85r_decode(VALUE obj, VALUE input)
{
	return base85r_decode(input);
}

.base85r_encode(input) ⇒ Object

Encode arbitrary binary string to the ASCII string using modified version of BASE85 (useful for obfuscation of .rb source files)



1721
1722
1723
1724
# File 'ext/node-marshal/nodedump.c', line 1721

static VALUE m_base85r_encode(VALUE obj, VALUE input)
{
	return base85r_encode(input);
}

.compile_rb_file(outfile, inpfile, *args) ⇒ Object



59
60
61
62
63
# File 'lib/node-marshal.rb', line 59

def self.compile_rb_file(outfile, inpfile, *args)
	node = NodeMarshal.new(:srcfile, inpfile)
	node.to_compiled_rb(outfile, *args)
	return true
end

Instance Method Details

#compileObject

Creates the RubyVM::InstructionSequence object from the node



1351
1352
1353
1354
1355
1356
1357
1358
# File 'ext/node-marshal/nodedump.c', line 1351

static VALUE m_nodedump_compile(VALUE self)
{
	NODE *node = RNODE(rb_iv_get(self, "@node"));
	VALUE nodename = rb_iv_get(self, "@nodename");
	VALUE filename = rb_iv_get(self, "@filename");
	VALUE filepath = rb_iv_get(self, "@filepath");
	return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
}

#dump_treeObject

Transforms Ruby syntax tree (NODE) to the text string using rb_parser_dump_tree function from node.c (see Ruby source code).



1462
1463
1464
1465
1466
# File 'ext/node-marshal/nodedump.c', line 1462

static VALUE m_nodedump_parser_dump_tree(VALUE self)
{
	NODE *node = RNODE(rb_iv_get(self, "@node"));
	return rb_parser_dump_tree(node, 0);
}

#dump_tree_shortObject

Prints the node tree in the short variant



1471
1472
1473
1474
1475
1476
1477
# File 'ext/node-marshal/nodedump.c', line 1471

static VALUE m_nodedump_dump_tree_short(VALUE self)
{
	VALUE str = rb_str_new2(""); // Output string
	NODE *node = RNODE(rb_iv_get(self, "@node"));
	print_node(str, node, 0);
	return str;
}

#filenameObject

Returns name of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1660
1661
1662
1663
# File 'ext/node-marshal/nodedump.c', line 1660

static VALUE m_nodedump_filename(VALUE self)
{
	return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
}

#filename=(val) ⇒ Object

Sets name of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
# File 'ext/node-marshal/nodedump.c', line 1669

static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
{
	if (val != Qnil)
	{
		Check_Type(val, T_STRING);
		rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0));	
	}
	else
	{
		rb_iv_set(self, "@filename", Qnil);
	}
	return self;
}

#filepathObject

Returns path of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1687
1688
1689
1690
# File 'ext/node-marshal/nodedump.c', line 1687

static VALUE m_nodedump_filepath(VALUE self)
{
	return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
}

#filepath=Object

Sets the path of file that was used for node generation and will be used by YARV (or nil/<compiled> if a string of code was used)



1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
# File 'ext/node-marshal/nodedump.c', line 1699

static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
{
	if (val != Qnil)
	{
		Check_Type(val, T_STRING);
		rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
	}
	else
	{
		rb_iv_set(self, "@filepath", Qnil);
	}
	return self;
}

#inspectObject

Gives the information about the node



1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
# File 'ext/node-marshal/nodedump.c', line 1554

static VALUE m_nodedump_inspect(VALUE self)
{
	static char str[1024], buf[512];
	VALUE num_of_nodes, nodename, filepath, filename;
	VALUE val_obj_addresses, val_nodeinfo;
	// Get generic information about node
	num_of_nodes = rb_iv_get(self, "@num_of_nodes");
	nodename = rb_iv_get(self, "@nodename");
	filepath = rb_iv_get(self, "@filepath");
	filename = rb_iv_get(self, "@filename");
	// Generate string with generic information about node
	sprintf(str,
		"----- NodeMarshal:0x%"PRIxPTR"\n"
		"    num_of_nodes: %d\n    nodename: %s\n    filepath: %s\n    filename: %s\n",
		self,
		(num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
		(nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
		(filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
		(filename == Qnil) ? "nil" : RSTRING_PTR(filename)
		);
	// Check if the information about node struct is available
	val_nodeinfo = rb_iv_get(self, "@nodeinfo");
	val_obj_addresses = rb_iv_get(self, "@obj_addresses");
	if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
	{
		m_nodedump_to_hash(self);
		val_nodeinfo = rb_iv_get(self, "@nodeinfo");
	}
	// Information about preparsed node
	// a) NODEInfo struct
	if (val_nodeinfo == Qnil)
	{
		sprintf(buf, "    NODEInfo struct is empty\n");
	}
	else
	{
		NODEInfo *ninfo;
		Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
		sprintf(buf, 
			"    NODEInfo struct:\n"
			"      syms hash len (Symbols):         %d\n"
			"      lits hash len (Literals):        %d\n"
			"      idtabs hash len (ID tables):     %d\n"
			"      gentries hash len (Global vars): %d\n"
			"      nodes hash len (Nodes):          %d\n"
#ifdef USE_RB_ARGS_INFO
			"      args hash len (args info):       %d\n"
#endif
			,
			FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
			FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
			FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
			FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
			FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0))
#ifdef USE_RB_ARGS_INFO
			,
			FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
#endif
		);
	}
	strcat(str, buf);
	// b) NODEObjAddresses struct
	if (val_obj_addresses == Qnil)
	{
		sprintf(buf, "    NODEObjAddresses struct is empty\n");
	}
	else
	{
		NODEObjAddresses *objadr;
		Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
		sprintf(buf, 
			"    NODEObjAddresses struct:\n"
			"      syms_len (Num of symbols):      %d\n"
			"      lits_len (Num of literals):     %d\n"
			"      idtbls_len (Num of ID tables):  %d\n"
			"      gvars_len (Num of global vars): %d\n"
			"      nodes_len (Num of nodes):       %d\n"
#ifdef USE_RB_ARGS_INFO
			"      args_len: (Num of args info):   %d\n"
#endif
			, objadr->syms_len, objadr->lits_len,
			objadr->idtbls_len, objadr->gvars_len,
			objadr->nodes_len
#ifdef USE_RB_ARGS_INFO
			, objadr->args_len
#endif
		);
	}
	strcat(str, buf);
	strcat(str, "------------------\n");
	// Generate output string
	return rb_str_new2(str);
}

#literalsObject

Return array with the list of literals



1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
# File 'ext/node-marshal/nodedump.c', line 1307

static VALUE m_nodedump_literals(VALUE self)
{
	int i;
	VALUE val_relocs, val_nodeinfo, lits;
	// Variant 1: node loaded from file
	val_relocs = rb_iv_get(self, "@obj_addresses");
	if (val_relocs != Qnil)
	{
		NODEObjAddresses *relocs;

		Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
		lits = rb_ary_new();
		for (i = 0; i < relocs->lits_len; i++)
			rb_ary_push(lits, rb_funcall(relocs->lits_adr[i], rb_intern("dup"), 0));
		return lits;
	}
	// Variant 2: node saved to file (parsed from memory)
	val_nodeinfo = rb_iv_get(self, "@nodeinfo");
	if (val_nodeinfo != Qnil)
	{
		NODEInfo *ninfo;
		VALUE *ary;
		Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
		lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
		ary = RARRAY_PTR(lits);
		for (i = 0; i < RARRAY_LEN(lits); i++)
		{
			int t = TYPE(ary[i]);
			if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
				ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
		}
		return lits;
	}
	rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");	
}

#nodeObject

Returns node object



1756
1757
1758
1759
# File 'ext/node-marshal/nodedump.c', line 1756

static VALUE m_nodedump_node(VALUE self)
{
	return rb_iv_get(self, "@node");
}

#nodenameObject

Returns node name (usually <main>)



1651
1652
1653
1654
# File 'ext/node-marshal/nodedump.c', line 1651

static VALUE m_nodedump_nodename(VALUE self)
{
	return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
}

#symbolsObject

Return array with the list of symbols



1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
# File 'ext/node-marshal/nodedump.c', line 1271

static VALUE m_nodedump_symbols(VALUE self)
{
	int i;
	VALUE val_relocs, val_nodeinfo, syms;
	// Variant 1: node loaded from file
	val_relocs = rb_iv_get(self, "@obj_addresses");
	if (val_relocs != Qnil)
	{
		NODEObjAddresses *relocs;
		Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
		syms = rb_ary_new();
		for (i = 0; i < relocs->syms_len; i++)
			rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
		return syms;
	}
	// Variant 2: node saved to file (parsed from memory)
	val_nodeinfo = rb_iv_get(self, "@nodeinfo");
	if (val_nodeinfo != Qnil)
	{
		NODEInfo *ninfo;
		VALUE *ary;
		Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
		syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
		ary = RARRAY_PTR(syms);
		for (i = 0; i < RARRAY_LEN(syms); i++)
		{
			ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
		}
		return syms;
	}
	rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
}

#to_binObject

Converts NodeMarshal class example to the binary string that can be saved to the file and used for loading the node from the file. Format of the obtained binary dump depends on used platform (especially size of the pointer) and Ruby version.



1544
1545
1546
1547
1548
1549
# File 'ext/node-marshal/nodedump.c', line 1544

static VALUE m_nodedump_to_bin(VALUE self)
{
	VALUE hash = m_nodedump_to_hash(self);
	VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
	return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
}

#to_compiled_rb(outfile, *args) ⇒ Object

call-seq:

obj.to_compiled_rb(outfile, opts)

Transforms node to the Ruby file

  • outfile – name of the output file

  • opts – Hash with options (:compress, :so_path) :compress can be true or false, :so_path is a test string with the command for nodemarshal.so inclusion (default is require_relative ‘../ext/node-marshal/nodemarshal.so’)



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/node-marshal.rb', line 14

def to_compiled_rb(outfile, *args)
	compress = false
	so_path = "require_relative '../ext/node-marshal/nodemarshal.so'"
	if args.length > 0
		opts = args[0]
		if opts.has_key?(:compress)
			compress = opts[:compress]
		end
		if opts.has_key?(:so_path)
			so_path = opts[:so_path]
		end
	end
	# Compression
	if compress
		zlib_include = "require 'zlib'"
		data_txt = NodeMarshal.base85r_encode(Zlib::deflate(self.to_bin))
		data_bin = "Zlib::inflate(NodeMarshal.base85r_decode(data_txt))"
	else
		zlib_include = "# No compression"
		data_txt = self.to_text
		data_bin = "NodeMarshal.base85r_decode(data_txt)"
	end
	# Document header
	txt = <<EOS
# Ruby compressed source code
# RUBY_PLATFORM: #{RUBY_PLATFORM}
# RUBY_VERSION: #{RUBY_VERSION}
#{zlib_include}
#{so_path}
data_txt = <<DATABLOCK
#{data_txt}
DATABLOCK
data_bin = #{data_bin}
node = NodeMarshal.new(:binmemory, data_bin)
node.filename = __FILE__
node.filepath = File.expand_path(node.filename)
node.compile.eval
EOS
	# Process input arguments
	if outfile != nil
		File.open(outfile, 'w') {|fp| fp << txt}
	end
	return txt
end

#to_hashObject

Converts NodeMarshal class example to the hash that contains full and independent from data structures memory addresses information. Format of the obtained hash depends on used platform (especially size of the pointer) and Ruby version.

Format of the hash

Part 1: Signatures

  • MAGIC – NODEMARSHAL10

  • RUBY_PLATFORM – saved RUBY_PLATFORM constant value

  • RUBY_VERSION – saved RUBY_VERSION constant value

Part 2: Program loadable elements.

All loadable elements are arrays. Index of the array element means its identifier that is used in the node tree.

  • literals – program literals (strings, ranges etc.)

  • symbols – program symbols

  • global_entries – global variables information

  • id_tables – array of arrays. Each array contains symbols IDs

  • args – information about code block argument(s)

Part 3: Nodes information

  • nodes – string that contains binary encoded information about the nodes



1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
# File 'ext/node-marshal/nodedump.c', line 1511

static VALUE m_nodedump_to_hash(VALUE self)
{
	NODE *node = RNODE(rb_iv_get(self, "@node"));
	NODEInfo *info;
	VALUE ans, num, val_info;
	// DISABLE GARBAGE COLLECTOR (important for dumping)
	rb_gc_disable();
	// Allocate memory for the information about node
	val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
		NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
	NODEInfo_init(info);
	rb_iv_set(self, "@nodeinfo", val_info);
	// Convert node to NODEInfo structure
	num = INT2FIX(count_num_of_nodes(node, node, info));
	ans = NODEInfo_toHash(info);
	rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
	rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
	rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
	rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
	// ENABLE GARBAGE COLLECTOR (important for dumping)
	rb_gc_enable();
	return ans;
}

#to_binObject

Converts NodeMarshal class example to the text string (modified Base85 encoding) that can be saved to the file and used for loading the node from the file. Format of the obtained binary dump depends on used platform (especially size of the pointer) and Ruby version.



1747
1748
1749
1750
1751
# File 'ext/node-marshal/nodedump.c', line 1747

static VALUE m_nodedump_to_text(VALUE self)
{
	VALUE bin = m_nodedump_to_bin(self);
	return base85r_encode(bin);
}