Class: ApkXml

Inherits:
Object
  • Object
show all
Defined in:
lib/apktools/apkxml.rb

Overview

Class to parse an APK’s binary XML format back into textual XML

Defined Under Namespace

Classes: ChunkHeader, ResTypeEntry, StringPool, XmlAttribute, XmlElement, XmlResourceMap, XmlTreeHeader

Constant Summary collapse

DEBUG =

:nodoc:

false

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(apk_file) ⇒ ApkXml

Create a new ApkXml instance from the specified apk_file

This opens and parses the contents of the APK’s resources.arsc file.



115
116
117
118
119
# File 'lib/apktools/apkxml.rb', line 115

def initialize(apk_file)
  @current_apk = apk_file
  Zip.warn_invalid_date = false
  @apk_resources = ApkResources.new(apk_file)
end

Instance Attribute Details

#apk_resourcesObject (readonly)

ApkResources instance used to resolve resources in this APK



107
108
109
# File 'lib/apktools/apkxml.rb', line 107

def apk_resources
  @apk_resources
end

#current_apkObject (readonly)

APK file where parser will search for XML



105
106
107
# File 'lib/apktools/apkxml.rb', line 105

def current_apk
  @current_apk
end

#xml_elementsObject (readonly)

Array of XmlElements from the last parse operation



109
110
111
# File 'lib/apktools/apkxml.rb', line 109

def xml_elements
  @xml_elements
end

Instance Method Details

#parse_xml(xml_file, pretty = false, resolve_resources = false) ⇒ Object

Read the requested XML file from inside the APK and parse out into readable textual XML. Returns a string of the parsed XML.

xml_file: ID value of a resource as a FixNum or String representation (i.e. 0x7F060001) pretty: Optionally format the XML output as human readable resolve_resources: Optionally, where possible, resolve resource references to their default value

This opens and parses the contents of the APK’s resources.arsc file.



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# File 'lib/apktools/apkxml.rb', line 130

def parse_xml(xml_file, pretty = false, resolve_resources = false)
  # Reset variables
  @xml_elements = Array.new()
  xml_output = ''
  indent = 0
  data = nil

  Zip.warn_invalid_date = false
  Zip::File.foreach(@current_apk) do |f|
    if f.name.match(xml_file)
      data = f.get_input_stream.read.force_encoding('BINARY')
    end
  end


  # Parse the Header Chunk
  header = ChunkHeader.new( read_short(data, HEADER_START),
      read_short(data, HEADER_START+2),
      read_word(data, HEADER_START+4) )

  # Parse the StringPool Chunk
  startoffset_pool = HEADER_START + header.size
  puts "Parse Main StringPool Chunk" if DEBUG
  stringpool_main = parse_stringpool(data, startoffset_pool)
  puts "#{stringpool_main.values.length} strings found" if DEBUG

  # Parse the remainder of the file chunks based on type
  namespaces = Hash.new()
  current = startoffset_pool + stringpool_main.header.chunk_size
  puts "Parse Remaining Chunks" if DEBUG
  while current < data.length
    ## Parse Header
    header = ChunkHeader.new( read_short(data, current),
        read_short(data, current+2),
        read_word(data, current+4) )
    ## Check Type
    if header.type == TYPE_XML_RESOURCEMAP
      ## Maps resource ids to strings in the pool
      map_ids = Array.new()
      map_strings = Array.new()

      index_offset = current + header.size
      i = 0
      while index_offset < (current + header.chunk_size)
        map_ids << read_word(data, index_offset)
        map_strings << stringpool_main.values[i]

        i += 1
        index_offset = i * 4 + (current + header.size)
      end

      current += header.chunk_size
    elsif header.type == TYPE_XML_STARTNAMESPACE
      tree_header = parse_tree_header(header, data, current)
      body_start = current+header.size
      prefix = stringpool_main.values[read_word(data, body_start)]
      uri = stringpool_main.values[read_word(data, body_start+4)]
      namespaces[uri] = prefix
      puts "NAMESPACE_START: xmlns:#{prefix} = '#{uri}'" if DEBUG
      current += header.chunk_size
    elsif header.type == TYPE_XML_ENDNAMESPACE
      tree_header = parse_tree_header(header, data, current)
      body_start = current+header.size
      prefix = stringpool_main.values[read_word(data, body_start)]
      uri = stringpool_main.values[read_word(data, body_start+4)]
      puts "NAMESPACE_END: xmlns:#{prefix} = '#{uri}'" if DEBUG
      current += header.chunk_size
    elsif header.type == TYPE_XML_STARTELEMENT
      tree_header = parse_tree_header(header, data, current)
      body_start = current+header.size
      # Parse the element/attribute data
      namespace = nil
      if read_word(data, body_start) != OFFSET_NO_ENTRY
        namespace = stringpool_main.values[read_word(data, body_start)]
      end
      name = stringpool_main.values[read_word(data, body_start+4)]

      attribute_offset = read_short(data, body_start+8)
      attribute_size = read_short(data, body_start+10)
      attribute_count = read_short(data, body_start+12)
      id_idx = read_short(data, body_start+14)
      class_idx = read_short(data, body_start+16)
      style_idx = read_short(data, body_start+18)

      attributes = Array.new()
      i=0
      while i < attribute_count
        index_offset = i * attribute_size + (body_start + attribute_offset)
        attr_namespace = nil
        if read_word(data, index_offset) != OFFSET_NO_ENTRY
          attr_uri = stringpool_main.values[read_word(data, index_offset)]
          attr_namespace = namespaces[attr_uri]
        end
        attr_name = stringpool_main.values[read_word(data, index_offset+4)]
        attr_raw = nil
        if read_word(data, index_offset+8) != OFFSET_NO_ENTRY
          # Attribute has a raw value, use it
          attr_raw = stringpool_main.values[read_word(data, index_offset+8)]
        end
        entry = ResTypeEntry.new(0, nil, read_byte(data, index_offset+15), read_word(data, index_offset+16))

        attr_value = nil
        if attr_raw != nil # Use raw value
          attr_value = attr_raw
        elsif entry.data_type == 1 # Value is a references to a resource
          # Find the resource
          default_res = apk_resources.get_default_resource_value(entry.data)
          if resolve_resources && default_res != nil
            # Use the default resource value
            attr_value = default_res.data
          else
            key_value = apk_resources.get_resource_key(entry.data, true)
            if key_value != nil
              # Use the key string
              attr_value = key_value
            else
              #No key found, use raw id marked as a resource
              attr_value = "res:0x#{entry.data.to_s(16)}"
            end
          end
        else # Value is a constant
          attr_value = "0x#{entry.data.to_s(16)}"
        end


        attributes << XmlAttribute.new(attr_namespace, attr_name, attr_value)
        i += 1
      end

      element = XmlElement.new(tree_header, namespace, name, id_idx, class_idx, style_idx, attributes, xml_output == "")

      # Print the element/attribute data
      puts "ELEMENT_START: #{element.namespace} #{element.name}" if DEBUG
      display_name = element.namespace == nil ? element.name : "#{element.namespace}:#{element.name}"

      if pretty
        xml_output += "\n" + ("  " * indent)
        indent += 1
      end
      xml_output += "<#{display_name} "
      # Only print namespaces on the root element
      if element.is_root
        keys = namespaces.keys
        keys.each do |key|
          xml_output += "xmlns:#{namespaces[key]}=\"#{key}\" "
          if pretty && key != keys.last
            xml_output += "\n" + ("  " * indent)
          end
        end
      end

      element.attributes.each do |attr|
        puts "---ATTRIBUTE: #{attr.namespace} #{attr.name} #{attr.value}" if DEBUG
        display_name = attr.namespace == nil ? attr.name : "#{attr.namespace}:#{attr.name}"
        if pretty
          xml_output += "\n" + ("  " * indent)
        end
        xml_output += "#{display_name}=\"#{attr.value}\" "
      end

      xml_output += ">"

      # Push every new element onto the array
      @xml_elements << element

      current += header.chunk_size
    elsif header.type == TYPE_XML_ENDELEMENT
      tree_header = parse_tree_header(header, data, current)
      body_start = current+header.size
      namespace = nil
      if read_word(data, body_start) != OFFSET_NO_ENTRY
        namespace = stringpool_main.values[read_word(data, body_start)]
      end
      name = stringpool_main.values[read_word(data, body_start+4)]

      puts "ELEMENT END: #{namespace} #{name}" if DEBUG
      display_name = namespace == nil ? name : "#{namespace}:#{name}"
      if pretty
        indent -= 1
        if indent < 0
          indent = 0
        end
        xml_output += "\n" + ("  " * indent)
      end
      xml_output += "</#{display_name}>"


      current += header.chunk_size
    elsif header.type == TYPE_XML_CDATA
      tree_header = parse_tree_header(header, data, current)
      body_start = current+header.size

      cdata = stringpool_main.values[read_word(data, body_start)]
      cdata_type = read_word(data, body_start+7)
      cdata_value = read_word(data, body_start+8)
      puts "CDATA: #{cdata} #{cdata_type} #{cdata_value}" if DEBUG

      cdata.split(/\r?\n/).each do |item|
        if pretty
          xml_output += "\n" + ("  " * indent)
        end
        xml_output += "<![CDATA[#{item.strip}]]>"
      end

      current += header.chunk_size
    else
      puts "Unknown Chunk Found: #{header.type} #{header.size}" if DEBUG
      ## End Immediately
      current = data.length
    end
  end

  return xml_output
end