Module: Splitter

Defined in:
lib/splitter.rb,
lib/splitter/version.rb

Constant Summary collapse

DEFAULT_BATCH_SIZE =
100
VERSION =
"0.0.2"

Class Method Summary collapse

Class Method Details

.split(filename, options) {|s| ... } ⇒ Object

Yields:

  • (s)


7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/splitter.rb', line 7

def split(filename, options)
  start_wrapper = options[:start_wrapper] || (raise ArgumentError, "A starting wrapper tag (:start_wrapper) must be provided.")
  end_wrapper = options[:end_wrapper] || (raise ArgumentError, "An ending wrapper tag (:end_wrapper) must be provided.")
  splitter = options[:splitter] || (raise ArgumentError, "A splitter tag (:splitter) must be provided.")
  splitter = "</#{splitter}>" unless splitter =~ /\<\/.+\>/
  batch_size = options[:batch_size] || DEFAULT_BATCH_SIZE

  s = ""
  count = 0
  File.open(filename) do |f|
    while(line = f.read(1024))
      while(i = line.index(splitter))
        count += 1
        s << "#{line[0...i]}#{splitter}"

        if count > 0 && count % batch_size == 0
          s << end_wrapper
          yield s
          s.clear
          s << start_wrapper
          count = 0
        end

        line = line[(i+splitter.size)..-1]
      end

      s << line unless line.empty?
    end
  end

  yield s unless s.empty? || s =~ /#{start_wrapper}\s*#{end_wrapper}/
end