Class: XHTMLDocument

Inherits:

Object

Object
XHTMLDocument

show all

Defined in:: lib/ribit/xhtmldocument.rb

Overview

Class for modifying XHTML document

Constant Summary collapse

ID = Attribute name that marks modifiable elements

'aid'

HEAD_ELE = Name of head element

'head'

Instance Method Summary collapse

#<<(data) ⇒ Object

Stream operator for putting the header elements like xml and DOCTYPE.
#add_metadata(element) ⇒ Object

Adds an element to header element.
#elements_by_id(id) ⇒ Object

Returns all elements having #ID=“#id”.
#elements_by_type(type) ⇒ Object

Returns all elements having #ID attribute and element types matches to wanted.
#get_all_tagged_elements ⇒ Object

Returns all element that has attribute #ID.
#get_ids ⇒ Object

Returns all different attribute values of attribute #ID.
#initialize(xhtmlData) ⇒ XHTMLDocument constructor

xhtmlData is data in string format.
#insert_text(id, text) ⇒ Object

Replaces (or inserts) the text for all elements that has attribute #ID=#id.
#process_marked_elements ⇒ Object

Goes through all marked elements and runs the given block of code.
#remove_all_ids ⇒ Object

Removes all #ID attributes.
#remove_elements(id) ⇒ Object

Removes those elements from the document that are tagged.
#to_s ⇒ Object

Outputs the current document to String and returns it.

Constructor Details

#initialize(xhtmlData) ⇒ `XHTMLDocument`

xhtmlData is data in string format

# File 'lib/ribit/xhtmldocument.rb', line 31

def initialize( xhtmlData )
  # raises error if invalid format
  @doc = REXML::Document.new( xhtmlData )
  
  # check that <html> root element exists
  if @doc.elements['html'] == nil
    raise XHTMLException.new, 'No proper root element <html> for the document:' + xhtmlData
  end
  
  @logger = RibitLogger.new( XHTMLDocument )
end

Instance Method Details

#<<(data) ⇒ `Object`

Stream operator for putting the header elements like xml and DOCTYPE



140
141
142

# File 'lib/ribit/xhtmldocument.rb', line 140

def <<( data )
  @doc << data
end

#add_metadata(element) ⇒ `Object`

Adds an element to header element. If header does not exist then it is created

# File 'lib/ribit/xhtmldocument.rb', line 103

def add_metadata( element )
  head = @doc.root.elements[HEAD_ELE]
  if ( head == nil )
    head = REXML::Element.new( HEAD_ELE )
    firstElement = @doc.root[0]
    if ( firstElement == nil )
      @doc.root.add_element( head )
    else
      @doc.root.insert_before( firstElement, head )
    end
  end
  
  head.add_element( element )
    
end

#elements_by_id(id) ⇒ `Object`

Returns all elements having #ID=“#id”



78
79
80

# File 'lib/ribit/xhtmldocument.rb', line 78

def elements_by_id( id )
  return REXML::XPath.match( @doc.root, "//[@#{ID}='#{id}']" ) 
end

#elements_by_type(type) ⇒ `Object`

Returns all elements having #ID attribute and element types matches to wanted.



84
85
86

# File 'lib/ribit/xhtmldocument.rb', line 84

def elements_by_type( type )
  return REXML::XPath.match( @doc.root, "//#{type}[@#{ID}]" ) 
end

#get_all_tagged_elements ⇒ `Object`

Returns all element that has attribute #ID



125
126
127

# File 'lib/ribit/xhtmldocument.rb', line 125

def get_all_tagged_elements
  return REXML::XPath.match( @doc.root, "//[@#{ID}]" ) 
end

#get_ids ⇒ `Object`

Returns all different attribute values of attribute #ID

# File 'lib/ribit/xhtmldocument.rb', line 95

def get_ids
    # gets array of elements and replace each cell value by attribute value
  ids = get_all_tagged_elements.collect! {|ele| ele.attributes[ID] }
  # remove duplicates
  return ids.uniq
end

#insert_text(id, text) ⇒ `Object`

Replaces (or inserts) the text for all elements that has attribute #ID=#id

# File 'lib/ribit/xhtmldocument.rb', line 89

def insert_text( id, text )
  @logger.debug( "Inserting text ID=#{id}, html=#{text}" )
  elements_by_id( id ).each {|ele| ele.text = text }
end

#process_marked_elements ⇒ `Object`

Goes through all marked elements and runs the given block of code

# File 'lib/ribit/xhtmldocument.rb', line 44

def process_marked_elements
  
  forProcessingList = []
  forProcessingList.push( @doc.root ) unless @doc.root == nil
  
  while ( !forProcessingList.empty? )
    # take last element from list
    element = forProcessingList.pop
    
    if ( element.attributes['aid'] != nil )
      # remove aid before processing it
      id = element.attributes['aid']
      element.attributes['aid'] = nil
      
      yield( id, element )
    end

    # add children only if element still remains, ie. it is not removed
    #  - other case is the root element
    if ( ( element.parent != nil or element == @doc.root ) and element.has_elements? )
      # add all possible childs to list
      forProcessingList.concat( element.to_a.reject { |cell| !cell.kind_of? REXML::Element } )
    end
  end
end

#remove_all_ids ⇒ `Object`

Removes all #ID attributes



120
121
122

# File 'lib/ribit/xhtmldocument.rb', line 120

def remove_all_ids
  get_all_tagged_elements.each { |ele| ele.delete_attribute( ID ) }
end

#remove_elements(id) ⇒ `Object`

Removes those elements from the document that are tagged

# File 'lib/ribit/xhtmldocument.rb', line 71

def remove_elements( id )
  elements_by_id( id ).each do |ele|
    ele.remove
  end
end

#to_s ⇒ `Object`

Outputs the current document to String and returns it.

# File 'lib/ribit/xhtmldocument.rb', line 130

def to_s
    # write to string
    output = ""
  @doc.write( output, 0 )
  # remove extra line feeds from the beginning and the end
  # => strip however removes too much, so we add one linefeed
  return output.strip + "\n"
end

Class: XHTMLDocument

Overview

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xhtmlData) ⇒ XHTMLDocument

Instance Method Details

#<<(data) ⇒ Object

#add_metadata(element) ⇒ Object

#elements_by_id(id) ⇒ Object

#elements_by_type(type) ⇒ Object

#get_all_tagged_elements ⇒ Object

#get_ids ⇒ Object

#insert_text(id, text) ⇒ Object

#process_marked_elements ⇒ Object

#remove_all_ids ⇒ Object

#remove_elements(id) ⇒ Object

#to_s ⇒ Object