Class: ContentParser

Inherits:
Object
  • Object
show all
Includes:
Assert
Defined in:
lib/ribit/contentparser.rb

Instance Method Summary collapse

Methods included from Assert

assert, #assert, assert_nil, #assert_nil, #assert_not_nil, assert_not_nil, raise_exception

Constructor Details

#initialize(ribitData) ⇒ ContentParser

Returns a new instance of ContentParser.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/ribit/contentparser.rb', line 76

def initialize( ribitData )
  @parsers = [
  SectionParser.new,   
  ListParser.new,
  UnformattedContentParser.new,
  ParagraphParser.new,
  BoldParser.new,
  ItalicParser.new,
  RibitLinkParser.new,
  DirectLinkParser.new,
  EscapeParser.new ]
  
  @ribitData = ribitData
  @doc = nil
end

Instance Method Details

#handle_container(container, parser) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/ribit/contentparser.rb', line 108

def handle_container( container, parser )
  # keeps record the index of currently handled childContainer
  newChilds = Array.new
  
  container.get_childs().each do |childContainer|
    
    if ( childContainer.text? )
      
      text = childContainer.get_text
      #puts "handling text element = " + text
      newContainers = parser.parse( text, @doc )
      #puts "size of newContainers = " + newContainers.size().to_s
      
      if ( newContainers != nil and newContainers.size() > 0 )
        newChilds.concat( newContainers )
      else
        newChilds.push( childContainer )
      end
      
    elsif ( childContainer.childs? )
      #puts "handling childContainer recursively"
      # other than text element and it has childs
      #  => recursive parsing

      # special case (hack!), there are not paragraphs inside List although
      # format may match. For same reason lists are parsed before paragraphs
      if ( parser.instance_of?( ParagraphParser ) and childContainer.instance_of?( ListContainer )  )
        # do not go deeper
      else
        handle_container( childContainer, parser )
      end
      newChilds.push( childContainer )
    else
      newChilds.push( childContainer )
    end
    
  end
  
  container.set_childs( newChilds )
end

#parse(page) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/ribit/contentparser.rb', line 93

def parse( page )
  assert_not_nil( page, 'The provided page is nil' )
  @doc = ContentDoc.new( @ribitData, page )
  
  @parsers.each do |parser|
    #puts "activating parser: " + parser.class.name
    handle_container( @doc.get_root, parser )
    
    #puts "current doc = " + @doc.to_s
  end
  
  return @doc.to_s     
end