Class: LineIterator

Inherits:
SimpleDelegator
  • Object
show all
Includes:
Enumerable
Defined in:
lib/line_iterator.rb,
lib/line_iterator/version.rb

Constant Summary collapse

BUFFER_SIZE =
100
VERSION =
"0.1.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input, opts = {}) ⇒ LineIterator

Open up the input. If it’s a string, assume a filename and open it up. Also, run it through GzipReader if the filename ends in .gz or if :gzip => true was passed in the opts



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/line_iterator.rb', line 22

def initialize(input, opts={})
  if input.is_a? IO
    @f = input
    # deal with IO object
  else # assume string
    @f = File.open(input)
    opts[:gzip] ||= (input =~ /\.gz\Z/)
  end
  
  if opts[:gzip]
    @f =  Zlib::GzipReader.new(@f)
  end
  @base_iterator = @f.each_with_index
  @last_line_number = 0
  @last_record_number = 0
  @done = false
  @buffer = []
  @backup_buffer = []
  @end_of_record_pattern = /\A\s*\n/
end

Instance Attribute Details

#doneObject (readonly)

Returns the value of attribute done.



9
10
11
# File 'lib/line_iterator.rb', line 9

def done
  @done
end

#end_of_record_patternObject

Returns the value of attribute end_of_record_pattern.



10
11
12
# File 'lib/line_iterator.rb', line 10

def end_of_record_pattern
  @end_of_record_pattern
end

#last_line_numberObject (readonly)

Returns the value of attribute last_line_number.



9
10
11
# File 'lib/line_iterator.rb', line 9

def last_line_number
  @last_line_number
end

#last_record_numberObject (readonly)

Returns the value of attribute last_record_number.



9
10
11
# File 'lib/line_iterator.rb', line 9

def last_record_number
  @last_record_number
end

Instance Method Details

#eachObject Also known as: each_line

Override normal #each to track last_line_nunber



100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/line_iterator.rb', line 100

def each
  unless block_given?
    return enum_for :each
  end
  begin
    while true
      yield self.next
    end
  rescue StopIteration
    @done = true
  end
end

#each_recordObject

iterate over records



171
172
173
174
175
176
177
178
179
180
181
# File 'lib/line_iterator.rb', line 171

def each_record
  unless block_given?
    return enum_for(:each_record)
  end
  
  begin
    while !self.done
      yield self.next_record
    end
  end
end

#each_with_line_numberObject

Like #each_with_index, but track line numbers This allows you to call next/skip and still get the correct line number out



118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/line_iterator.rb', line 118

def each_with_line_number
  unless block_given? 
    return enum_for :each_with_line_number
  end
  
  begin
    while true
      yield [self.next, self.last_line_number]
    end
  rescue StopIteration
    @done = true
  end
end

#end_of_record(buff) ⇒ Object

This default implementation just checks for blank lines and eats them, but you can override this in a subclass (perhaps using the contents of the buffer to determine EOR status)



140
141
142
143
144
145
146
147
148
# File 'lib/line_iterator.rb', line 140

def end_of_record(buff)
  y = peek
  if  end_of_record_pattern.match(y[0])
    self.next # eat the next line
    return true
  else 
    return false
  end
end

#nextObject Also known as: next_line

Override the normal enumerable #next to keep internal track of line numbers



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/line_iterator.rb', line 47

def next
  # Get the next line from the backup buffer or the stream
  y = @backup_buffer.empty? ? @base_iterator.next : @backup_buffer.shift
  
  # Feed the buffer
  @buffer.shift if @buffer.size ==  BUFFER_SIZE
  @buffer.push y
  
  @last_line_number = y[1] + 1
  return y[0].chomp
end

#next_recordObject

Get the next record and return (as an array of chomp’ed lines)

Raises:

  • (StopIteration)


151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/line_iterator.rb', line 151

def next_record
  raise StopIteration if self.done
  buff = []
  begin
    while true do
      if end_of_record(buff) and not buff.empty?
        @last_record_number += 1
        return buff
      else
        buff << self.next
      end
    end
  rescue StopIteration
    @last_record_number += 1
    @done = true
    return buff
  end
end

#peekObject

Provide a #peek that takes into account the backup buffer



63
64
65
# File 'lib/line_iterator.rb', line 63

def peek
  @backup_buffer.empty? ? @base_iterator.peek : @backup_buffer[0]
end

#skip(n = 1) ⇒ Object

Skip n lines (default: 1). Just calls next over and over again, but will never throw StopIteration error



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/line_iterator.rb', line 70

def skip(n = 1)
  if n == 0
    return;
  elsif n > 0
    skip_forward(n)
  elsif n < 0
    skip_backwards(-n)
  else
    raise "Tried to skip backwards more than size of buffer (#{BUFFER_SIZE})"
  end
end

#skip_backwards(n) ⇒ Object

Raises:

  • (IndexError.new)


92
93
94
95
96
97
# File 'lib/line_iterator.rb', line 92

def skip_backwards(n)    
  # can we back up?
  raise IndexError.new, "Tried to skip backwards too far", nil if n > @buffer.size
  n.times { @backup_buffer.unshift @buffer.pop }
  @last_line_number = @backup_buffer[0][1]
end

#skip_forward(n) ⇒ Object



82
83
84
85
86
87
88
89
90
# File 'lib/line_iterator.rb', line 82

def skip_forward(n)
  begin
    n.times do
      self.next
    end
  rescue StopIteration
    @done = true
  end
end

#skip_record(n = 1) ⇒ Object Also known as: skip_records

Skip over records (forward only!!!!)



184
185
186
187
188
189
190
# File 'lib/line_iterator.rb', line 184

def skip_record(n = 1)
  begin
    n.times {self.next_record}
  rescue StopIteration
    @done = true
  end
end

#to_sObject Also known as: inspect



12
13
14
# File 'lib/line_iterator.rb', line 12

def to_s
  "#{self.class} <#{@f}, last_line_number: #{last_line_number}>"
end