Class: Wikiscript::TableReader

Inherits:
Object
  • Object
show all
Defined in:
lib/wikiscript/table_reader.rb

Class Method Summary collapse

Class Method Details

.parse(txt) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/wikiscript/table_reader.rb', line 27

def self.parse( txt )
  tables = []     ## todo/check: allow multiple tables? why? why not?

  rows   = nil   ## note: assume first row is the headers row!!
  row    = nil   ## current row  ## note: same as rows[-1]

  inside_table = false

  txt.each_line do |line|
    line = line.strip

    break if line == '__END__'

    ## note:  allow/add comments
    ##   note: CANNOT allow inline (end-of-line) comments
    ##     would strip/break css colors eg.  bgcolor=#ffff44
    next if line.start_with?( '#' )   ## skip comments too
    next if line.empty?               ## skip empty lines for now


    ## note:  for the table format
    ##  see https://en.wikipedia.org/wiki/Help:Basic_table_markup

    if line.start_with?( '{|' )     ## start table
      inside_table = true
      rows = []
    elsif inside_table && line.start_with?( '|}' )  ## end table
      tables << rows
      rows   = nil
      row    = nil
      inside_table = false
    elsif inside_table && line.start_with?( '|-' )  ## row divider
       row = []
       rows << row
    elsif inside_table && line.start_with?( '!' )    ## header column
       values = line.sub( '!', '' ).strip.split( '!!' )
       ## note: |-  row divider is optional before header columns
       if rows.empty?
         row = []
         rows << row
       end
       ## add each value one-by-one for now (to keep (same) row reference)
       ##   note: also strip leading (optional) attributes
       values.each do |value|
         row <<  strip_emphases( strip_attributes( value.strip ))
       end
    elsif inside_table && line.start_with?( '|' )   ## table data
       values = line.sub( '|', '' ).strip.split( '||' )
       ## add each value one-by-one for now (to keep (same) row reference)
       values.each do |value|
         row <<  strip_emphases( strip_attributes( value.strip ))
       end
    elsif inside_table
      puts "!! ERROR !! unknown line type inside table:"
      puts line
      exit 1
    else
      puts "!! ERROR !! unknown line type outside (before or after) table:"
      puts line
      exit 1
    end
  end
  tables
end

.parse_table(txt) ⇒ Object

only allow single table



13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/wikiscript/table_reader.rb', line 13

def self.parse_table( txt )   ## only allow single table
  tables = parse( txt )

  if tables.size == 0
    puts "** !!! ERROR !!! no table found in text"
    exit 1
  elsif tables.size > 1
    puts "** !!! ERROR !!! too many tables (#{tables.size}) found in text; only one expected/allowed; sorry"
    exit 1
  else
    tables[0]    ## pass-along first table; everything ok
  end
end

.read(path) ⇒ Object

use - rename to read_file or from_file etc. - why? why not?



7
8
9
10
# File 'lib/wikiscript/table_reader.rb', line 7

def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ).read
  parse( txt )
end

.strip_attributes(value) ⇒ Object

helper



94
95
96
97
98
99
100
# File 'lib/wikiscript/table_reader.rb', line 94

def self.strip_attributes( value )
  if value =~ /^[a-z]+=/                      ## if starts with 'attribute='
    value = value.sub( /[^|]+\|[ ]*/ , '' )   ## strip everything incl. pipe (|) and trailing spaces
  else
    value   ## return as-is (pass-through)
  end
end

.strip_emphases(value) ⇒ Object

strip bold or emphasis; note: emphases plural of emphasis



102
103
104
105
# File 'lib/wikiscript/table_reader.rb', line 102

def self.strip_emphases( value )   ## strip bold or emphasis; note: emphases plural of emphasis
  value = value.gsub( /'{2,}/, '' ).strip   ## remove two or more quotes e.g. '' or ''' etc.
  value
end