Module: BPList

Defined in:: lib/imsg-grep/apple/bplist.rb

Constant Summary collapse

APPLE_EPOCH = Apple’s epoch offset from Unix epoch

978307200

Class Method Summary collapse

.get_count(data, pos, low) ⇒ Object

Get count/length (handles 0xF continuation).
.parse(data) ⇒ Object
.read_int(data, pos, size) ⇒ Object

Read big-endian integer from data at position.

Class Method Details

.get_count(data, pos, low) ⇒ `Object`

Get count/length (handles 0xF continuation)

# File 'lib/imsg-grep/apple/bplist.rb', line 29

def self.get_count(data, pos, low)
  return [low, pos + 1] if low != 0x0F

  raise "Position #{pos + 1} beyond data size" if pos + 1 >= data.bytesize
  int_marker = data[pos + 1].ord
  int_high   = int_marker >> 4
  raise "Invalid count marker" unless int_high == 0x1

  byte_count = 1 << (int_marker & 0x0F)
  count = read_int(data, pos + 2, byte_count)
  [count, pos + 2 + byte_count]
end

.parse(data) ⇒ `Object`

# File 'lib/imsg-grep/apple/bplist.rb', line 42

def self.parse(data)
  data = data.dup.force_encoding("BINARY")
  raise "Invalid header" unless data.start_with?("bplist00")

  # Parse trailer (last 32 bytes)
  trailer_start     = data.bytesize - 32
  offset_int_size   = data[trailer_start + 6].ord
  objref_size       = data[trailer_start + 7].ord
  num_objects       = data.unpack1("Q>", offset: trailer_start + 8)
  root_object_index = data.unpack1("Q>", offset: trailer_start + 16)
  offset_table_pos  = data.unpack1("Q>", offset: trailer_start + 24)

  raise "Invalid trailer" if offset_int_size < 1 || objref_size < 1
  raise "Invalid object count" if num_objects < 1 || root_object_index >= num_objects

  # Read offset table
  offsets = Array.new(num_objects) do |i|
    pos = offset_table_pos + i * offset_int_size
    read_int(data, pos, offset_int_size)
  end

  # Parse objects recursively
  objects = Array.new(num_objects)
  object_cache = {}

  parse_object = lambda do |index|
    raise "Invalid object ref: #{index}" if index >= num_objects
    return objects[index] if objects[index]

    # Check cache first
    offset = offsets[index]
    return object_cache[offset] if object_cache.has_key?(offset)

    # Set placeholder to detect circular refs
    objects[index] = :parsing

    pos    = offsets[index]
    raise "Position #{pos} beyond data size #{data.bytesize}" if pos >= data.bytesize
    marker = data[pos].ord
    high   = marker >> 4
    low    = marker & 0x0F

    result = case high
    when 0x0  # Null, Bool, Fill
      case marker
      when 0x00 then nil
      when 0x08 then false
      when 0x09 then true
      else raise "Unknown null type: 0x#{marker.to_s(16)}"
      end

    when 0x1  # Integer
      byte_count = 1 << low
      raise "Invalid int size" if byte_count > 16
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize

      if byte_count == 16
        # 128-bit integer - read as two 64-bit values (high, low)
        high = data[pos + 1, 8].unpack1("Q>")
        low = data[pos + 9, 8].unpack1("Q>")
        # Convert to signed if high MSB is set
        if high >= (1 << 63)
          high = high - (1 << 64)
        end
        # Ruby handles big integers automatically
        (high << 64) | low
      else
        value = read_int(data, pos + 1, byte_count)
        # Per Apple spec: only 8+ byte integers are signed, 1/2/4 byte are unsigned
        if byte_count >= 8 && value >= (1 << (byte_count * 8 - 1))
          value - (1 << (byte_count * 8))
        else
          value
        end
      end

    when 0x2  # Real
      byte_count = 1 << low
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize
      case byte_count
      when 4 then data[pos + 1, 4].unpack1("g")
      when 8 then data[pos + 1, 8].unpack1("G")
      else raise "Invalid real size: #{byte_count}"
      end

    when 0x3  # Date
      raise "Invalid date marker" unless marker == 0x33
      raise "Position #{pos + 1} + 8 beyond data size" if pos + 1 + 8 > data.bytesize
      seconds = data[pos + 1, 8].unpack1("G")
      Time.at(APPLE_EPOCH + seconds)

    when 0x4  # Data
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize
      data[start, count]

    when 0x5  # ASCII string
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count} beyond data size" if start + count > data.bytesize
      ascii_data = data[start, count]
      # Validate ASCII - check for non-ASCII bytes
      ascii_data.force_encoding("US-ASCII")
      if ascii_data.valid_encoding?
        ascii_data.encode!("UTF-8")
      else
        # Invalid ASCII, keep as binary for later Base64 encoding
        ascii_data.force_encoding("BINARY")
      end

    when 0x6  # UTF-16 string
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * 2} beyond data size" if start + count * 2 > data.bytesize
      utf16_data = data[start, count * 2]
      # Convert UTF-16BE to UTF-8
      begin
        utf16_data.force_encoding("UTF-16BE").encode!("UTF-8")
      rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
        # Invalid UTF-16, keep as binary for later Base64 encoding
        utf16_data.force_encoding("BINARY")
      end

    when 0x8  # UID
      byte_count = low + 1
      raise "Position #{pos + 1} + #{byte_count} beyond data size" if pos + 1 + byte_count > data.bytesize
      { "CF$UID" => read_int(data, pos + 1, byte_count)}

    when 0xA  # Array
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize
      Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) }

    when 0xC  # Set
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size} beyond data size" if start + count * objref_size > data.bytesize
      Set.new(Array.new(count) { |i| parse_object.call(read_int(data, start + i * objref_size, objref_size)) })

    when 0xD  # Dict
      count, start = get_count(data, pos, low)
      raise "Position #{start} + #{count * objref_size * 2} beyond data size" if start + count * objref_size * 2 > data.bytesize
      Array.new(count) { |i|
        [ parse_object.call(read_int(data, start + i * objref_size, objref_size)),
          parse_object.call(read_int(data, start + (count + i) * objref_size, objref_size))]
      }.to_h

    else
      raise "Unknown marker: 0x#{marker.to_s(16)}"
    end

    objects[index] = result
    # Cache the parsed object by its offset for reuse
    object_cache[offset] = result
  end

  parse_object.call(root_object_index)
end

.read_int(data, pos, size) ⇒ `Object`

Read big-endian integer from data at position

# File 'lib/imsg-grep/apple/bplist.rb', line 11

def self.read_int(data, pos, size)
  raise "Position #{pos} + #{size} beyond data size" if pos + size > data.bytesize
  case size
  when 1 then data[pos].unpack1("C")
  when 2 then data[pos, 2].unpack1("n")
  when 4 then data[pos, 4].unpack1("N")
  when 8 then data[pos, 8].unpack1("Q>")
  else
    # Fallback for other sizes
    bytes = data[pos, size].unpack("C*")
    bytes.reduce(0) { |a, b|
      raise "nil value in read_int: a=#{a.inspect}, b=#{b.inspect}" if a.nil? || b.nil?
      (a << 8) | b
    }
  end
end

Module: BPList

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.get_count(data, pos, low) ⇒ Object

.parse(data) ⇒ Object

.read_int(data, pos, size) ⇒ Object

.get_count(data, pos, low) ⇒ `Object`

.parse(data) ⇒ `Object`

.read_int(data, pos, size) ⇒ `Object`