Class: SportDb::OutlineReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/formats/outline_reader.rb

Constant Summary collapse

HEADING_BLANK_RE =

note: skip “decorative” only heading e.g. ========

todo/check:  find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
%r{\A
={1,}
\z}x
HEADING_RE =

note: like in wikimedia markup (and markdown) all optional trailing ==== too

%r{\A
(?<marker>={1,})       ## 1. leading ======
  [ ]*
(?<text>[^=]+)         ## 2. text   (note: for now no "inline" = allowed)
  [ ]*
  =*                   ## 3. (optional) trailing ====
\z}x

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ OutlineReader

Returns a new instance of OutlineReader.



16
17
18
# File 'lib/sportdb/formats/outline_reader.rb', line 16

def initialize( txt )
  @txt = txt
end

Class Method Details

.parse(txt) ⇒ Object



12
13
14
# File 'lib/sportdb/formats/outline_reader.rb', line 12

def self.parse( txt )
  new( txt ).parse
end

.read(path) ⇒ Object

use - rename to read_file or from_file etc. - why? why not?



7
8
9
10
# File 'lib/sportdb/formats/outline_reader.rb', line 7

def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ) {|f| f.read }
  parse( txt )
end

Instance Method Details

#parseObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/sportdb/formats/outline_reader.rb', line 35

def parse
  outline=[]   ## outline structure
  start_para = true      ## start new para(graph) on new text line?

  @txt.each_line do |line|
      line = line.strip      ## todo/fix: keep leading and trailing spaces - why? why not?

      if line.empty?    ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
        start_para = true
        next
      end

      break if line == '__END__'

      next if line.start_with?( '#' )   ## skip comments too
      ## strip inline (until end-of-line) comments too
      ##  e.g Eupen | KAS Eupen ## [de]
      ##   => Eupen | KAS Eupen
      ##  e.g bq   Bonaire,  BOE        # CONCACAF
      ##   => bq   Bonaire,  BOE
      line = line.sub( /#.*/, '' ).strip
      pp line

      ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
      next if HEADING_BLANK_RE.match( line )  # skip "decorative" only heading e.g. ========

       ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
      if m=HEADING_RE.match( line )
         start_para = true

         heading_marker = m[:marker]
         heading_level  = m[:marker].length   ## count number of = for heading level
         heading        = m[:text].strip

         puts "heading #{heading_level} >#{heading}<"
         outline << [:"h#{heading_level}", heading]
      else    ## assume it's a (plain/regular) text line
         if start_para
           outline << [:p, [line]]
           start_para = false
         else
           node = outline[-1]    ## get last entry
           if node[0] == :p      ##  assert it's a p(aragraph) node!!!
              node[1] << line    ## add line to p(aragraph)
           else
             puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
             pp node
             exit 1
           end
         end
      end
  end
  outline
end