Class: Matryoshka::Document::Html::Merge

Inherits:
Object
  • Object
show all
Defined in:
lib/matryoshka/document/html/merge.rb

Constant Summary collapse

DEFAULT_MERGE =
[
    {:empty=>:full},
    {:doc=>:remerge_children},
    {:id=>:replace},
    {:before_id=>:before},
    {:after_id=>:append},
    {:single_tag => :replace},
    {:header_tag => :end_of_tag},
    {:default=>:end_of_tag}
]
EXTERNAL_MERGE =
[
  {:original_id => :replace_id},
  {:original_selector => :selector},
  {:nochildren=>:inside_body},
  {:doc => :remerge_original_children}, 
  # {:id=>:replace}, 
  # {:parent=>:insert}, # This translate to before_id or after_id
  {:all=>:remerge_children}
]
@@round =
0

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(orig, add, meth = DEFAULT_MERGE) ⇒ Merge

Returns a new instance of Merge.



32
33
34
35
36
# File 'lib/matryoshka/document/html/merge.rb', line 32

def initialize(orig,add, meth = DEFAULT_MERGE)
  self.original = convert_to_parse_format(orig)
  self.additional = convert_to_parse_format(add)
  self.methodologies = meth
end

Instance Attribute Details

#additionalObject

original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order

=> what_to_do_if_match, …


30
31
32
# File 'lib/matryoshka/document/html/merge.rb', line 30

def additional
  @additional
end

#correspondingObject

original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order

=> what_to_do_if_match, …


30
31
32
# File 'lib/matryoshka/document/html/merge.rb', line 30

def corresponding
  @corresponding
end

#methodologiesObject

original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order

=> what_to_do_if_match, …


30
31
32
# File 'lib/matryoshka/document/html/merge.rb', line 30

def methodologies
  @methodologies
end

#originalObject

original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order

=> what_to_do_if_match, …


30
31
32
# File 'lib/matryoshka/document/html/merge.rb', line 30

def original
  @original
end

Instance Method Details

#additional_idObject

These are quickly tacked on.



131
132
133
# File 'lib/matryoshka/document/html/merge.rb', line 131

def additional_id
  additional.attributes['id'] if additional.respond_to? :attributes
end

#after_idObject



147
148
149
150
151
152
153
# File 'lib/matryoshka/document/html/merge.rb', line 147

def after_id
  if additional_id
    if additional_id.index('after__') == 0
      original.at("##{additional_id.sub('after__','')}")
    end
  end
end

#allObject



183
184
185
# File 'lib/matryoshka/document/html/merge.rb', line 183

def all
  original
end

#appendObject



155
156
157
# File 'lib/matryoshka/document/html/merge.rb', line 155

def append
  corresponding.after(additional.to_html)
end

#beforeObject



143
144
145
# File 'lib/matryoshka/document/html/merge.rb', line 143

def before
  corresponding.before(additional.to_html)
end

#before_idObject



135
136
137
138
139
140
141
# File 'lib/matryoshka/document/html/merge.rb', line 135

def before_id
  if additional_id
    if additional_id.index('before__') == 0
      original.at("##{additional_id.sub('before__','')}")
    end
  end
end

#convert_to_parse_format(data) ⇒ Object



57
58
59
60
61
62
63
64
65
66
# File 'lib/matryoshka/document/html/merge.rb', line 57

def convert_to_parse_format(data)
  if data.respond_to? :content # Html object
    # This should already be in Hpricot
    data.content
  elsif data.respond_to? :to_html # Hpricot object
    data
  else # probably string or IO
    Hpricot data
  end
end

#corresponding_match(find_method) ⇒ Object



53
54
55
# File 'lib/matryoshka/document/html/merge.rb', line 53

def corresponding_match(find_method)
  self.corresponding = send(find_method)
end

#defaultObject



159
160
161
# File 'lib/matryoshka/document/html/merge.rb', line 159

def default
  original.at('body') or original
end

#docObject



78
79
80
81
# File 'lib/matryoshka/document/html/merge.rb', line 78

def doc
  # original unless additional.kind_of? Nokogiri::HTML::Element
  original if additional.class == Hpricot::Doc
end

#emptyObject

Below are matchign and replacing methods Perhaps move them to a separate module later



70
71
72
# File 'lib/matryoshka/document/html/merge.rb', line 70

def empty
  original if original.inner_html.empty?
end

#end_of_tagObject



163
164
165
# File 'lib/matryoshka/document/html/merge.rb', line 163

def end_of_tag
  corresponding.inner_html = corresponding.inner_html + additional.to_html
end

#fullObject



74
75
76
# File 'lib/matryoshka/document/html/merge.rb', line 74

def full
  corresponding.inner_html = additional.inner_html
end

#header_tagObject



187
188
189
# File 'lib/matryoshka/document/html/merge.rb', line 187

def header_tag
  ['link', 'meta','script'].include?(additional.name) and original.at('head')
end

#idObject

End externa-specific merging methods



121
122
123
# File 'lib/matryoshka/document/html/merge.rb', line 121

def id
  original.at("##{additional.attributes['id']}") if additional.attributes['id']
end

#inside_bodyObject



177
178
179
180
181
# File 'lib/matryoshka/document/html/merge.rb', line 177

def inside_body
  corresponding.inner_html = begin
    additional.at('body') or additional
  end.to_html
end

#mergeable?Boolean

Returns:

  • (Boolean)


196
197
198
199
# File 'lib/matryoshka/document/html/merge.rb', line 196

def mergeable?
  acceptable_classes_for_merging = [Hpricot::Elem, Hpricot::Doc]
  acceptable_classes_for_merging.include? additional.class
end

#nochildrenObject



167
168
169
170
171
172
173
174
175
# File 'lib/matryoshka/document/html/merge.rb', line 167

def nochildren
  return original.at('*') unless original.children
  original.children.each do |child|
    if child.class == Hpricot::Elem
      return false
    end
  end
  original.at('*')
end

#original_idObject

Some external-specific merging methods



101
102
103
# File 'lib/matryoshka/document/html/merge.rb', line 101

def original_id
  original if additional.at("##{original.attributes['id']}") if original.attributes['id']
end

#original_selectorObject



109
110
111
112
113
# File 'lib/matryoshka/document/html/merge.rb', line 109

def original_selector
  if original.attributes['rel'] == 'selector'
    original if additional.at(original.attributes['href'])
  end
end

#remerge_childrenObject



83
84
85
86
87
88
89
90
# File 'lib/matryoshka/document/html/merge.rb', line 83

def remerge_children
  additional.children.each do |elem|
    # if elem.kind_of? Nokogiri::XML::Element
    if elem.kind_of? Hpricot::Elem
      self.class.new(corresponding,elem,methodologies).run
    end
  end
end

#remerge_original_childrenObject



92
93
94
95
96
97
98
# File 'lib/matryoshka/document/html/merge.rb', line 92

def remerge_original_children
  corresponding.children.each do |elem|
    if elem.kind_of? Hpricot::Elem
      self.class.new(elem, additional, methodologies).run
    end
  end
end

#replaceObject



125
126
127
# File 'lib/matryoshka/document/html/merge.rb', line 125

def replace
  corresponding.swap additional.to_html
end

#replace_idObject



105
106
107
# File 'lib/matryoshka/document/html/merge.rb', line 105

def replace_id
  corresponding.swap additional.at("##{corresponding.attributes['id']}").to_html
end

#runObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/matryoshka/document/html/merge.rb', line 38

def run
  # puts "Round #{@@round += 1}"
  methodologies.each do |methodology|
    methodology.each_pair do |find_method, merge_technique|
      # puts "#{@@round} - Doing #{find_method}:#{merge_technique} on #{additional.to_html[0..50].gsub(/\n/,'')}"
      if (mergeable? and corresponding_match(find_method))
        send merge_technique
        return original
      end
    end
  end
  # This wasn't in previous versions ... may be some reason to avoid.
  return original
end

#selectorObject



115
116
117
# File 'lib/matryoshka/document/html/merge.rb', line 115

def selector
  corresponding.swap additional.at(corresponding.attributes['href']).to_html
end

#single_tagObject

For when there can be only one of a tag



192
193
194
# File 'lib/matryoshka/document/html/merge.rb', line 192

def single_tag
  ['title','head','body'].include?(additional.name) and original.at(additional.name)
end