Class: Splitter_rss

Inherits:
Object
  • Object
show all
Includes:
Splitter
Defined in:
lib/abelard/load.rb

Constant Summary collapse

NS =
['wp:http://wordpress.org/export/1.2/']

Instance Method Summary collapse

Methods included from Splitter

#item, #write_doc_clean, #write_item

Constructor Details

#initialize(document, destination) ⇒ Splitter_rss

Returns a new instance of Splitter_rss.



40
41
42
43
# File 'lib/abelard/load.rb', line 40

def initialize(document, destination)
  @doc = document
  @dest = destination
end

Instance Method Details

#extract_comments(item) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/abelard/load.rb', line 45

def extract_comments(item)
  # In a wordpress export, the comments are in wp:comment elements
  basename = Post_id_rss.new(item)

  all = []
  comment_nodes = item.find("wp:comment", NS)
  comment_nodes.each do |node|
    comment_doc = LibXML::XML::Document.new()
    comment_doc.root = node.remove!
    approved = comment_doc.find_first("/wp:comment/wp:comment_approved", NS)
    author_email = comment_doc.find_first("/wp:comment/wp:comment_author_email", NS)
    author_ip = comment_doc.find_first("/wp:comment/wp:comment_author_IP", NS)
    id = comment_doc.find_first("/wp:comment/wp:comment_id", NS)

    # delete some sensitive fields
    author_email.remove! if (author_email)
    author_ip.remove! if (author_ip)
    
    if (approved && (approved.content == '1'))
      filename = basename.as_comment(id.content)

      all << Item.new(comment_doc, "#{@dest}/#{filename}")
    end
  end
  all
end

#save(node) ⇒ Object



119
120
121
122
123
124
125
126
127
# File 'lib/abelard/load.rb', line 119

def save(node)
  filename = Post_id_rss.new(node).to_s
  new_item = item(node, "#{@dest}/#{filename}")
  if new_item.status == :published
    new_item.save
  else
    $stderr.puts("skipping #{filename} as status #{new_item.status}")
  end
end

#split_itemsObject



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/abelard/load.rb', line 72

def split_items
  channel_count = 1
  rss = @doc.root
  @parent = LibXML::XML::Document.new()
  root = LibXML::XML::Node.new(rss.name)
  @parent.root = root
  rss.attributes.each { |a| root.attributes[a.name] = a.value }
  rss.children.select(&:element?).each do |channel|
    if (channel.name == "channel")
      root << channel.clone # shallow copy for feed.xml
      
      channelself = XmlUtil::self_link(channel)
      is_comments = (channelself =~ /comments/)
      
      copy = LibXML::XML::Node.new(channel.name)
      channel.attributes.each { |a| copy.attributes[a.name] = a.value }
      channel.children.select(&:element?).each do |node|
        $stderr.puts(node.name)
        if (node.name == "item")
          # attachments dont get saved as posts
          if ( node.find("wp:attachment_url", "wp:http://wordpress.org/export/1.2/").length > 0 )
            $stderr.puts("skipping attachment")
          else
            # in a wordpress export file, comments are included inside the post item
            comments = extract_comments(node)
            save(node)
            comments.each { |c| c.save }
          end
        else
          copy << node.copy(true)
        end
      end
      ch_copy = root.copy(true)
      ch_copy << copy
      unless is_comments
        channel_doc = LibXML::XML::Document.new()
        channel_doc.root = ch_copy
        channel_doc.save("#{@dest}/channel-#{channel_count}.xml")
      end
      channel_count = channel_count + 1
    else
      root << channel
    end
  end
  @parent.save("#{@dest}/feed.xml")
end