Class: TestTableExtractor
- Defined in:
- lib/table_extractor.rb
Constant Summary collapse
- @@table_separator_regexp =
Regex pattern to match table separator rows with optional colons and hyphens
/^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/- @@multi_line_delimiter =
'|'- @@single_line_delimiter =
'!'
Instance Method Summary collapse
- #test_inconsistent_columns ⇒ Object
- #test_indented_table ⇒ Object
- #test_multiple_tables ⇒ Object
- #test_no_tables ⇒ Object
- #test_single_table ⇒ Object
- #test_table_at_end_of_lines ⇒ Object
- #test_table_with_colon_hyphens ⇒ Object
- #test_table_without_starting_pipe ⇒ Object
Instance Method Details
#test_inconsistent_columns ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/table_extractor.rb', line 193 def test_inconsistent_columns lines = [ '| Species| Genus| Family', '|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae', '', '| Name| Species', '|-|-|-', '| Tapanuli Orangutan| Pongo tapanuliensis' ] # Number of columns determined from row of dividers expected = [ { column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter, rows: 4, start_index: 0 }, { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 3, start_index: 5 } ] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_indented_table ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/table_extractor.rb', line 136 def test_indented_table lines = [ "\t | Species| Genus| Family", "\t |-|-|-", "\t | Pongo tapanuliensis| Pongo| Hominidae", "\t | | Histiophryne| Antennariidae" ] expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 0 }] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_multiple_tables ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/table_extractor.rb', line 154 def test_multiple_tables lines = [ '| Species| Genus| Family', '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae', '', '| Name| Species', '|-|-', '| Tapanuli Orangutan| Pongo tapanuliensis' ] expected = [ { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 0 }, { column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter, rows: 3, start_index: 5 } ] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_no_tables ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/table_extractor.rb', line 180 def test_no_tables lines = [ 'This is a regular line.', 'Another regular line.' ] expected = [] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp ) end |
#test_single_table ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/table_extractor.rb', line 118 def test_single_table lines = [ '| Species| Genus| Family', '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 0 }] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_table_at_end_of_lines ⇒ Object
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/table_extractor.rb', line 220 def test_table_at_end_of_lines lines = [ 'Some introductory text.', '| Species| Genus| Family', '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [ { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 1 } ] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_table_with_colon_hyphens ⇒ Object
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 |
# File 'lib/table_extractor.rb', line 262 def test_table_with_colon_hyphens lines = [ '| Name| Age| City', '|:-:|:-|:-:', '| John Doe| 30| New York', '| Jane Doe| 25| Los Angeles' ] expected = [ { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 0 } ] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |
#test_table_without_starting_pipe ⇒ Object
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/table_extractor.rb', line 241 def test_table_without_starting_pipe lines = [ 'Some introductory text.', 'Platform| Target Environment| Command', '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [ { column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4, start_index: 1 } ] assert_equal expected, TableExtractor.extract_tables( lines, regexp: @@table_separator_regexp, multi_line_delimiter: @@multi_line_delimiter, single_line_delimiter: @@single_line_delimiter ) end |