Class: XmlSplit

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/xml_split.rb,
lib/xml_split/version.rb

Constant Summary collapse

POSSIBLE_BIN_NAMES =
%w{ sgrep sgrep2 }
MAGIC_START =
'n8frNy6J'
MAGIC_STOP =
'H6py5pxG'
CHUNK_SIZE =
65536
VERSION =
"0.0.1"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, element, options = {}) ⇒ XmlSplit

Returns a new instance of XmlSplit.



21
22
23
24
25
26
27
28
# File 'lib/xml_split.rb', line 21

def initialize(path, element, options = {})
  @nodes = []
  @cache_full = false

  @path = File.expand_path path
  @element = element
  @caching = options.fetch :caching, false
end

Instance Attribute Details

#cachingObject (readonly)

Returns the value of attribute caching.



19
20
21
# File 'lib/xml_split.rb', line 19

def caching
  @caching
end

#elementObject (readonly)

Returns the value of attribute element.



18
19
20
# File 'lib/xml_split.rb', line 18

def element
  @element
end

#pathObject (readonly)

Returns the value of attribute path.



17
18
19
# File 'lib/xml_split.rb', line 17

def path
  @path
end

Class Method Details

.sgrep_binObject



5
6
7
# File 'lib/xml_split.rb', line 5

def sgrep_bin
  @sgrep_bin ||= POSSIBLE_BIN_NAMES.detect { |bin| `which #{bin}`; $?.success? } or raise("Can't find any of #{POSSIBLE_BIN_NAMES.inspect} in your PATH")
end

Instance Method Details

#each(&blk) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/xml_split.rb', line 30

def each(&blk)
  if caching and @cache_full
    @nodes.each(&blk)
  else
    caching = caching
    leftover = ''
    IO.popen([ XmlSplit.sgrep_bin, '-n', '-o', "#{MAGIC_START}%r#{MAGIC_STOP}", %{"#{start}" .. "#{stop}"}, path ]) do |io|
      while additional = io.read(CHUNK_SIZE)
        buffer = leftover + additional
        while (start = buffer.index(MAGIC_START)) and (stop = buffer.index(MAGIC_STOP))
          node = buffer[(start+MAGIC_START.length)...stop] + '>'
          if caching
            @nodes << node
          end
          yield node
          buffer = buffer[(stop+MAGIC_STOP.length)..-1]
        end
        leftover = buffer
      end
    end
    @cache_full = true
  end
end