Class: TestTableExtractor

Inherits:
Minitest::Test
  • Object
show all
Defined in:
lib/table_extractor.rb

Constant Summary collapse

@@table_separator_regexp =

Regex pattern to match table separator rows with optional colons and hyphens

/^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/
@@multi_line_delimiter =
'|'
@@single_line_delimiter =
'!'

Instance Method Summary collapse

Instance Method Details

#test_inconsistent_columnsObject



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/table_extractor.rb', line 193

def test_inconsistent_columns
  lines = [
    '| Species| Genus| Family',
    '|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae',
    '',
    '| Name| Species',
    '|-|-|-',
    '| Tapanuli Orangutan| Pongo tapanuliensis'
  ]
  # Number of columns determined from row of dividers
  expected = [
    { column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter,
      rows: 4, start_index: 0 },
    { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
      rows: 3, start_index: 5 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_indented_tableObject



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/table_extractor.rb', line 136

def test_indented_table
  lines = [
    "\t | Species| Genus| Family",
    "\t |-|-|-",
    "\t | Pongo tapanuliensis| Pongo| Hominidae",
    "\t | | Histiophryne| Antennariidae"
  ]
  expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4,
                start_index: 0 }]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_multiple_tablesObject



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/table_extractor.rb', line 154

def test_multiple_tables
  lines = [
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae',
    '',
    '| Name| Species',
    '|-|-',
    '| Tapanuli Orangutan| Pongo tapanuliensis'
  ]
  expected = [
    { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
      rows: 4, start_index: 0 },
    { column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter,
      rows: 3, start_index: 5 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_no_tablesObject



180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/table_extractor.rb', line 180

def test_no_tables
  lines = [
    'This is a regular line.',
    'Another regular line.'
  ]
  expected = []
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp
               )
end

#test_single_tableObject



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/table_extractor.rb', line 118

def test_single_table
  lines = [
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4,
                start_index: 0 }]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_table_at_end_of_linesObject



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/table_extractor.rb', line 220

def test_table_at_end_of_lines
  lines = [
    'Some introductory text.',
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [
    { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
      rows: 4, start_index: 1 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_table_with_colon_hyphensObject



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/table_extractor.rb', line 262

def test_table_with_colon_hyphens
  lines = [
    '| Name| Age| City',
    '|:-:|:-|:-:',
    '| John Doe| 30| New York',
    '| Jane Doe| 25| Los Angeles'
  ]
  expected = [
    { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
      rows: 4, start_index: 0 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end

#test_table_without_starting_pipeObject



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/table_extractor.rb', line 241

def test_table_without_starting_pipe
  lines = [
    'Some introductory text.',
    'Platform| Target Environment| Command',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [
    { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
      rows: 4, start_index: 1 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(
                 lines,
                 regexp: @@table_separator_regexp,
                 multi_line_delimiter: @@multi_line_delimiter,
                 single_line_delimiter: @@single_line_delimiter
               )
end