Module: BPList

Defined in:
lib/imsg-grep/apple/bplist.rb

Constant Summary collapse

APPLE_EPOCH =

Apple’s epoch offset from Unix epoch

978307200

Class Method Summary collapse

Class Method Details

.get_count(data, pos, low) ⇒ Object

Get count/length (handles 0xF continuation)



29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/imsg-grep/apple/bplist.rb', line 29

def self.get_count(data, pos, low)
  return [low, pos + 1] if low != 0x0F

  raise "Position #{pos + 1} beyond data size" if pos + 1 >= data.bytesize
  int_marker = data[pos + 1].ord
  int_high   = int_marker >> 4
  raise "Invalid count marker" unless int_high == 0x1

  byte_count = 1 << (int_marker & 0x0F)
  count = read_int(data, pos + 2, byte_count)
  [count, pos + 2 + byte_count]
end

.parse(data) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/imsg-grep/apple/bplist.rb', line 42

def self.parse(data)
  data = data.dup.force_encoding("BINARY")
  raise "Invalid header" unless data.start_with?("bplist00")

  # Parse trailer (last 32 bytes)
  trailer_start     = data.bytesize - 32
  offset_int_size   = data[trailer_start + 6].ord
  objref_size       = data[trailer_start + 7].ord
  num_objects       = data.unpack1("Q>", offset: trailer_start + 8)
  root_object_index = data.unpack1("Q>", offset: trailer_start + 16)
  offset_table_pos  = data.unpack1("Q>", offset: trailer_start + 24)

  raise "Invalid trailer" if offset_int_size < 1 || objref_size < 1
  raise "Invalid object count" if num_objects < 1 || root_object_index >= num_objects

  # Read offset table
  offsets = Array.new(num_objects) do |i|
    pos = offset_table_pos + i * offset_int_size
    read_int(data, pos, offset_int_size)
  end

  # Parse objects recursively
  objects = Array.new(num_objects)
  object_cache = {}

  parse_object = lambda do |index|
    raise "Invalid object ref: #{index}" if index >= num_objects
    return objects[index] if objects[index]

    # Check cache first
    offset = offsets[index]
    return object_cache[offset] if object_cache.has_key?(offset)

    # Set placeholder to detect circular refs
    objects[index] = :parsing

    pos    = offsets[index]
    raise "Position #{pos} beyond data size #{data.bytesize}" if pos >= data.bytesize
    marker = data[pos].ord
    high   = marker >> 4
    low    = marker & 0x0F

    result = case high
    when 0x0  # Null, Bool, Fill
      case marker
      when 0x00 then nil
      when 0x08 then false
      when 0x09 then true
      else raise "Unknown null type: 0x#{marker.to_s(16)}"
      end

    when 0x1  # Integer
      byte_count = 1 << low
      raise "Invalid int size" if byte_count > 16
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize

      if byte_count == 16
        # 128-bit integer - read as two 64-bit values (high, low)
        high = data[pos + 1, 8].unpack1("Q>")
        low = data[pos + 9, 8].unpack1("Q>")
        # Convert to signed if high MSB is set
        if high >= (1 << 63)
          high = high - (1 << 64)
        end
        # Ruby handles big integers automatically
        (high << 64) | low
      else
        value = read_int(data, pos + 1, byte_count)
        # Per Apple spec: only 8+ byte integers are signed, 1/2/4 byte are unsigned
        if byte_count >= 8 && value >= (1 << (byte_count * 8 - 1))
          value - (1 << (byte_count * 8))
        else
          value
        end
      end

    when 0x2  # Real
      byte_count = 1 << low
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize
      case byte_count
      when 4 then data[pos + 1, 4].unpack1("g")
      when 8 then data[pos + 1, 8].unpack1("G")
      else raise "Invalid real size: #{byte_count}"
      end

    when 0x3  # Date
      raise "Invalid date marker" unless marker == 0x33
      raise "Position #{pos + 1} + 8 beyond data size" if pos + 1 + 8 > data.bytesize
      seconds = data[pos + 1, 8].unpack1("G")
      Time.at(APPLE_EPOCH + seconds)

    when 0x4  # Data
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize
      data[start, count]

    when 0x5  # ASCII string
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize
      ascii_data = data[start, count]
      # Validate ASCII - check for non-ASCII bytes
      ascii_data.force_encoding("US-ASCII")
      if ascii_data.valid_encoding?
        ascii_data.encode!("UTF-8")
      else
        # Invalid ASCII, keep as binary for later Base64 encoding
        ascii_data.force_encoding("BINARY")
      end

    when 0x6  # UTF-16 string
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * 2} beyond data size" if start + count * 2 > data.bytesize
      utf16_data = data[start, count * 2]
      # Convert UTF-16BE to UTF-8
      begin
        utf16_data.force_encoding("UTF-16BE").encode!("UTF-8")
      rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
        # Invalid UTF-16, keep as binary for later Base64 encoding
        utf16_data.force_encoding("BINARY")
      end

    when 0x8  # UID
      byte_count = low + 1
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize
      { "CF$UID" => read_int(data, pos + 1, byte_count)}

    when 0xA  # Array
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize
      Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) }

    when 0xC  # Set
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize
      Set.new(Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) })

    when 0xD  # Dict
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size * 2} beyond data size" if start + count * objref_size * 2 > data.bytesize
      Array.new(count) { |i|
        [ parse_object.call(read_int(data, start + i * objref_size, objref_size)),
          parse_object.call(read_int(data, start + (count + i) * objref_size, objref_size))]
      }.to_h

    else
      raise "Unknown marker: 0x#{marker.to_s(16)}"
    end

    objects[index] = result
    # Cache the parsed object by its offset for reuse
    object_cache[offset] = result
  end

  parse_object.call(root_object_index)
end

.read_int(data, pos, size) ⇒ Object

Read big-endian integer from data at position



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/imsg-grep/apple/bplist.rb', line 11

def self.read_int(data, pos, size)
  raise "Position #{pos} + #{size} beyond data size" if pos + size > data.bytesize
  case size
  when 1 then data[pos].unpack1("C")
  when 2 then data[pos, 2].unpack1("n")
  when 4 then data[pos, 4].unpack1("N")
  when 8 then data[pos, 8].unpack1("Q>")
  else
    # Fallback for other sizes
    bytes = data[pos, size].unpack("C*")
    bytes.reduce(0) { |a, b|
      raise "nil value in read_int: a=#{a.inspect}, b=#{b.inspect}" if a.nil? || b.nil?
      (a << 8) | b
    }
  end
end