Class: Dphil::TeiXML
- Inherits:
-
Object
- Object
- Dphil::TeiXML
- Defined in:
- lib/dphil/tei_xml.rb
Instance Method Summary collapse
-
#crop(expr) ⇒ Object
Public: Return a portion of the document as a new document.
- #crop_each(expr) ⇒ Object
- #empty? ⇒ Boolean
-
#initialize(source) ⇒ TeiXML
constructor
Public: Initialize a TeiXML object.
-
#reject(expr) ⇒ Object
Public: Remove elements from the document based on CSS selector.
-
#subst(expr, subst_text = nil) ⇒ Object
Public: Substitute elements from the document based on CSS selector with ID-based token text-nodes.
- #to_xml ⇒ Object (also: #to_s)
-
#xml ⇒ Object
Return or re-parse xml.
Constructor Details
#initialize(source) ⇒ TeiXML
Public: Initialize a TeiXML object
8 9 10 11 |
# File 'lib/dphil/tei_xml.rb', line 8 def initialize(source) source = %(<TEI version="5.0" xmlns="http://www.tei-c.org/ns/1.0"></TEI>) if source.strip.empty? @raw_xml = source end |
Instance Method Details
#crop(expr) ⇒ Object
Public: Return a portion of the document as a new document
expr - a CSS selector or XPath expression
Returns a new document.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/dphil/tei_xml.rb', line 40 def crop(expr) segment = xml.search(expr) pb = page_of(segment) lb = line_of(segment) source = <<~EOS <TEI version="5.0" xmlns="http://www.tei-c.org/ns/1.0"> <pre>#{pb&.to_xml}#{lb&.to_xml}</pre> #{segment.to_xml} <post></post> </TEI> EOS self.class.new(source) end |
#crop_each(expr) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/dphil/tei_xml.rb', line 55 def crop_each(expr) xml.search(expr).map do |segment| pb = page_of(segment) lb = line_of(segment) source = <<~EOS <TEI version="5.0" xmlns="http://www.tei-c.org/ns/1.0"> <pre>#{pb&.to_xml}#{lb&.to_xml}</pre> #{segment.to_xml} <post></post> </TEI> EOS self.class.new(source) end end |
#empty? ⇒ Boolean
31 32 33 |
# File 'lib/dphil/tei_xml.rb', line 31 def empty? xml.xpath("//text()[normalize-space()]").empty? end |
#reject(expr) ⇒ Object
Public: Remove elements from the document based on CSS selector.
expr - a CSS selector or XPath expression
Returns a new document.
76 77 78 79 80 81 82 |
# File 'lib/dphil/tei_xml.rb', line 76 def reject(expr) source = xml.dup source.search(expr).each do |node| node.replace(node.search("pb, lb")) end self.class.new(source.to_xml) end |
#subst(expr, subst_text = nil) ⇒ Object
Public: Substitute elements from the document based on CSS selector with
ID-based token text-nodes.
expr - a CSS selector or XPath expression subst_text - an optional text identifier
Returns a new document.
91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/dphil/tei_xml.rb', line 91 def subst(expr, subst_text = nil) source = parsed_xml.dup subst_text = subst_text.to_s.gsub(/\s+/, "_") unless subst_text.nil? source.search(expr).each do |node| set = Nokogiri::XML::NodeSet.new(source) escaped_text = ":#{node.attribute('id').to_s.gsub(/\s+/, '_')}" text_content = "#{subst_text || node.name}#{escaped_text}" set << Nokogiri::XML::Text.new(" {{#{text_content}}} ", source) node.replace(set + node.search("pb, lb")) end self.class.new(source.to_xml) end |
#to_xml ⇒ Object Also known as: to_s
25 26 27 |
# File 'lib/dphil/tei_xml.rb', line 25 def to_xml xml.to_xml end |
#xml ⇒ Object
Return or re-parse xml
14 15 16 17 18 19 20 21 22 23 |
# File 'lib/dphil/tei_xml.rb', line 14 def xml @xml ||= begin xml = Nokogiri::XML(@raw_xml) { |config| config.strict.noent } xml.encoding = "UTF-8" xml.remove_namespaces! xml_normalize!(xml) rescue Nokogiri::XML::SyntaxError => e raise "TEIDocument (source: #{@raw_xml}) caught exception: #{e}" end end |