Module: BPList
- Defined in:
- lib/imsg-grep/apple/bplist.rb
Constant Summary collapse
- APPLE_EPOCH =
Apple’s epoch offset from Unix epoch
978307200
Class Method Summary collapse
-
.get_count(data, pos, low) ⇒ Object
Get count/length (handles 0xF continuation).
- .parse(data) ⇒ Object
-
.read_int(data, pos, size) ⇒ Object
Read big-endian integer from data at position.
Class Method Details
.get_count(data, pos, low) ⇒ Object
Get count/length (handles 0xF continuation)
29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/imsg-grep/apple/bplist.rb', line 29 def self.get_count(data, pos, low) return [low, pos + 1] if low != 0x0F raise "Position #{pos + 1} beyond data size" if pos + 1 >= data.bytesize int_marker = data[pos + 1].ord int_high = int_marker >> 4 raise "Invalid count marker" unless int_high == 0x1 byte_count = 1 << (int_marker & 0x0F) count = read_int(data, pos + 2, byte_count) [count, pos + 2 + byte_count] end |
.parse(data) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/imsg-grep/apple/bplist.rb', line 42 def self.parse(data) data = data.dup.force_encoding("BINARY") raise "Invalid header" unless data.start_with?("bplist00") # Parse trailer (last 32 bytes) trailer_start = data.bytesize - 32 offset_int_size = data[trailer_start + 6].ord objref_size = data[trailer_start + 7].ord num_objects = data.unpack1("Q>", offset: trailer_start + 8) root_object_index = data.unpack1("Q>", offset: trailer_start + 16) offset_table_pos = data.unpack1("Q>", offset: trailer_start + 24) raise "Invalid trailer" if offset_int_size < 1 || objref_size < 1 raise "Invalid object count" if num_objects < 1 || root_object_index >= num_objects # Read offset table offsets = Array.new(num_objects) do |i| pos = offset_table_pos + i * offset_int_size read_int(data, pos, offset_int_size) end # Parse objects recursively objects = Array.new(num_objects) object_cache = {} parse_object = lambda do |index| raise "Invalid object ref: #{index}" if index >= num_objects return objects[index] if objects[index] # Check cache first offset = offsets[index] return object_cache[offset] if object_cache.has_key?(offset) # Set placeholder to detect circular refs objects[index] = :parsing pos = offsets[index] raise "Position #{pos} beyond data size #{data.bytesize}" if pos >= data.bytesize marker = data[pos].ord high = marker >> 4 low = marker & 0x0F result = case high when 0x0 # Null, Bool, Fill case marker when 0x00 then nil when 0x08 then false when 0x09 then true else raise "Unknown null type: 0x#{marker.to_s(16)}" end when 0x1 # Integer byte_count = 1 << low raise "Invalid int size" if byte_count > 16 raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize if byte_count == 16 # 128-bit integer - read as two 64-bit values (high, low) high = data[pos + 1, 8].unpack1("Q>") low = data[pos + 9, 8].unpack1("Q>") # Convert to signed if high MSB is set if high >= (1 << 63) high = high - (1 << 64) end # Ruby handles big integers automatically (high << 64) | low else value = read_int(data, pos + 1, byte_count) # Per Apple spec: only 8+ byte integers are signed, 1/2/4 byte are unsigned if byte_count >= 8 && value >= (1 << (byte_count * 8 - 1)) value - (1 << (byte_count * 8)) else value end end when 0x2 # Real byte_count = 1 << low raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize case byte_count when 4 then data[pos + 1, 4].unpack1("g") when 8 then data[pos + 1, 8].unpack1("G") else raise "Invalid real size: #{byte_count}" end when 0x3 # Date raise "Invalid date marker" unless marker == 0x33 raise "Position #{pos + 1} + 8 beyond data size" if pos + 1 + 8 > data.bytesize seconds = data[pos + 1, 8].unpack1("G") Time.at(APPLE_EPOCH + seconds) when 0x4 # Data count, start = get_count(data, pos, low) raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize data[start, count] when 0x5 # ASCII string count, start = get_count(data, pos, low) raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize ascii_data = data[start, count] # Validate ASCII - check for non-ASCII bytes ascii_data.force_encoding("US-ASCII") if ascii_data.valid_encoding? ascii_data.encode!("UTF-8") else # Invalid ASCII, keep as binary for later Base64 encoding ascii_data.force_encoding("BINARY") end when 0x6 # UTF-16 string count, start = get_count(data, pos, low) raise "Position #{start} + #{count * 2} beyond data size" if start + count * 2 > data.bytesize utf16_data = data[start, count * 2] # Convert UTF-16BE to UTF-8 begin utf16_data.force_encoding("UTF-16BE").encode!("UTF-8") rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError # Invalid UTF-16, keep as binary for later Base64 encoding utf16_data.force_encoding("BINARY") end when 0x8 # UID byte_count = low + 1 raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize { "CF$UID" => read_int(data, pos + 1, byte_count)} when 0xA # Array count, start = get_count(data, pos, low) raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) } when 0xC # Set count, start = get_count(data, pos, low) raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize Set.new(Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) }) when 0xD # Dict count, start = get_count(data, pos, low) raise "Position #{start} + #{count * objref_size * 2} beyond data size" if start + count * objref_size * 2 > data.bytesize Array.new(count) { |i| [ parse_object.call(read_int(data, start + i * objref_size, objref_size)), parse_object.call(read_int(data, start + (count + i) * objref_size, objref_size))] }.to_h else raise "Unknown marker: 0x#{marker.to_s(16)}" end objects[index] = result # Cache the parsed object by its offset for reuse object_cache[offset] = result end parse_object.call(root_object_index) end |
.read_int(data, pos, size) ⇒ Object
Read big-endian integer from data at position
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/imsg-grep/apple/bplist.rb', line 11 def self.read_int(data, pos, size) raise "Position #{pos} + #{size} beyond data size" if pos + size > data.bytesize case size when 1 then data[pos].unpack1("C") when 2 then data[pos, 2].unpack1("n") when 4 then data[pos, 4].unpack1("N") when 8 then data[pos, 8].unpack1("Q>") else # Fallback for other sizes bytes = data[pos, size].unpack("C*") bytes.reduce(0) { |a, b| raise "nil value in read_int: a=#{a.inspect}, b=#{b.inspect}" if a.nil? || b.nil? (a << 8) | b } end end |