Class: XHTMLDocument

Inherits:
Object
  • Object
show all
Defined in:
lib/ribit/xhtmldocument.rb

Overview

Class for modifying XHTML document

Constant Summary collapse

ID =

Attribute name that marks modifiable elements

'aid'
HEAD_ELE =

Name of head element

'head'

Instance Method Summary collapse

Constructor Details

#initialize(xhtmlData) ⇒ XHTMLDocument

xhtmlData is data in string format



31
32
33
34
35
36
37
38
39
40
41
# File 'lib/ribit/xhtmldocument.rb', line 31

def initialize( xhtmlData )
	# raises error if invalid format
	@doc = REXML::Document.new( xhtmlData )
	
	# check that <html> root element exists
	if @doc.elements['html'] == nil
		raise XHTMLException.new, 'No proper root element <html> for the document:' + xhtmlData
	end
	
	@logger = RibitLogger.new( XHTMLDocument )
end

Instance Method Details

#<<(data) ⇒ Object

Stream operator for putting the header elements like xml and DOCTYPE



140
141
142
# File 'lib/ribit/xhtmldocument.rb', line 140

def <<( data )
	@doc << data
end

#add_metadata(element) ⇒ Object

Adds an element to header element. If header does not exist then it is created



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/ribit/xhtmldocument.rb', line 103

def ( element )
	head = @doc.root.elements[HEAD_ELE]
	if ( head == nil )
		head = REXML::Element.new( HEAD_ELE )
		firstElement = @doc.root[0]
		if ( firstElement == nil )
			@doc.root.add_element( head )
		else
			@doc.root.insert_before( firstElement, head )
		end
	end
	
	head.add_element( element )
		
end

#elements_by_id(id) ⇒ Object

Returns all elements having #ID=“#id”



78
79
80
# File 'lib/ribit/xhtmldocument.rb', line 78

def elements_by_id( id )
	return REXML::XPath.match( @doc.root, "//[@#{ID}='#{id}']" ) 
end

#elements_by_type(type) ⇒ Object

Returns all elements having #ID attribute and element types matches to wanted.



84
85
86
# File 'lib/ribit/xhtmldocument.rb', line 84

def elements_by_type( type )
	return REXML::XPath.match( @doc.root, "//#{type}[@#{ID}]" ) 
end

#get_all_tagged_elementsObject

Returns all element that has attribute #ID



125
126
127
# File 'lib/ribit/xhtmldocument.rb', line 125

def get_all_tagged_elements
	return REXML::XPath.match( @doc.root, "//[@#{ID}]" ) 
end

#get_idsObject

Returns all different attribute values of attribute #ID



95
96
97
98
99
100
# File 'lib/ribit/xhtmldocument.rb', line 95

def get_ids
    # gets array of elements and replace each cell value by attribute value
	ids = get_all_tagged_elements.collect! {|ele| ele.attributes[ID] }
	# remove duplicates
	return ids.uniq
end

#insert_text(id, text) ⇒ Object

Replaces (or inserts) the text for all elements that has attribute #ID=#id



89
90
91
92
# File 'lib/ribit/xhtmldocument.rb', line 89

def insert_text( id, text )
	@logger.debug( "Inserting text ID=#{id}, html=#{text}" )
	elements_by_id( id ).each {|ele| ele.text = text }
end

#process_marked_elementsObject

Goes through all marked elements and runs the given block of code



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/ribit/xhtmldocument.rb', line 44

def process_marked_elements
  
  forProcessingList = []
  forProcessingList.push( @doc.root ) unless @doc.root == nil
  
  while ( !forProcessingList.empty? )
    # take last element from list
    element = forProcessingList.pop
    
    if ( element.attributes['aid'] != nil )
      # remove aid before processing it
      id = element.attributes['aid']
      element.attributes['aid'] = nil
      
      yield( id, element )
    end

    # add children only if element still remains, ie. it is not removed
    #  - other case is the root element
    if ( ( element.parent != nil or element == @doc.root ) and element.has_elements? )
      # add all possible childs to list
      forProcessingList.concat( element.to_a.reject { |cell| !cell.kind_of? REXML::Element } )
    end
  end
end

#remove_all_idsObject

Removes all #ID attributes



120
121
122
# File 'lib/ribit/xhtmldocument.rb', line 120

def remove_all_ids
	get_all_tagged_elements.each { |ele| ele.delete_attribute( ID ) }
end

#remove_elements(id) ⇒ Object

Removes those elements from the document that are tagged



71
72
73
74
75
# File 'lib/ribit/xhtmldocument.rb', line 71

def remove_elements( id )
	elements_by_id( id ).each do |ele|
		ele.remove
	end
end

#to_sObject

Outputs the current document to String and returns it.



130
131
132
133
134
135
136
137
# File 'lib/ribit/xhtmldocument.rb', line 130

def to_s
    # write to string
    output = ""
	@doc.write( output, 0 )
	# remove extra line feeds from the beginning and the end
	# => strip however removes too much, so we add one linefeed
	return output.strip + "\n"
end