Class: Feedtxt::IniParser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedtxt/parser/ini.rb

Constant Summary collapse

FEED_BEGIN =

note:

 regex excape  bracket: [ to \[
 \\ needs to get escaped twice e.g. (\\ becomes \)
e.g. [>>>  or [>>>>>
"^[ ]*\\[>>>+[ ]*$"
FEED_END =

e.g. <<<] or <<<<<<]

"^[ ]*<<<+\\][ ]*$"
FEED_NEXT =

e.g.</> or <<</>>>

"^[ ]*<+/>+[ ]*$"
FEED_META =

e.g. — or —–

"^[ ]*---+[ ]*$"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ IniParser

Note: lets keep/use same API as RSS::Parser for now



17
18
19
# File 'lib/feedtxt/parser/ini.rb', line 17

def initialize( text )
  @text = text
end

Class Method Details

.parse(text, opts = {}) ⇒ Object

convenience class/factory method



12
13
14
# File 'lib/feedtxt/parser/ini.rb', line 12

def self.parse( text, opts={} )
  self.new( text ).parse
end

Instance Method Details

#parseObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/feedtxt/parser/ini.rb', line 38

def parse

  ## find start marker e.g. [>>>
  ##    use regex - allow three or more >>>>>> or <<<<<<
  ##    allow spaces before and after

  s = StringScanner.new( @text )

  prolog = s.scan_until( /(?=#{FEED_BEGIN})/ )
  ## pp prolog

  feed_begin = s.scan( /#{FEED_BEGIN}/ )
  if feed_begin.empty?    ## use blank? why? why not??
    ## nothing found return empty array for now; return nil - why? why not?
    puts "warn !!! no begin marker found e.g. |>>>"
    return []
  end


  buf =  s.scan_until( /(?=#{FEED_END})/ )
  buf = buf.strip    # remove leading and trailing whitespace

  feed_end = s.scan( /#{FEED_END}/ )
  if feed_end.empty?   ## use blank? why? why not??
    ## nothing found return empty array for now; return nil - why? why not?
    puts "warn !!! no end marker found e.g. <<<|"
    return []
  end


  ####
  ## pass 1: split blocks by </>
  ###    note: allows   <<<</>>>>

  blocks = buf.split( /#{FEED_NEXT}/ )
  ## pp blocks

  ## 1st block is feed meta data
  block1st = blocks.shift       ## get/remove 1st block from blocks
  block1st = block1st.strip     ## strip leading and trailing whitespace
   = ::INI.load( block1st )

  feed_items = []
  blocks.each do |block|
    ###   note: do NOT use split e.g.--- is used by markdown
    ##      only search for first --- to split (all others get ignored)
    ##    todo: make three dashes --- (3) not hard-coded (allow more)

    s2 = StringScanner.new( block )

     = s2.scan_until( /(?=#{FEED_META})/ )
     = .strip    # remove leading and trailing whitespace
     = ::INI.load(  )   ## convert to hash with inifile parser

    feed_meta = s2.scan( /#{FEED_META}/ )

    item_content = s2.rest
    item_content = item_content.strip     # remove leading and trailing whitespace

    feed_items << [, item_content]
  end

  [ , feed_items ]
end