Class: HTMLTag

Inherits:
HTMLToken show all
Defined in:
lib/yadis/htmltokenizer.rb

Overview

Class representing an HTML tag

Instance Attribute Summary collapse

Attributes inherited from HTMLToken

#raw

Instance Method Summary collapse

Methods inherited from HTMLToken

#==, #to_s, #trimmed_text

Constructor Details

#initialize(text) ⇒ HTMLTag

Returns a new instance of HTMLTag.



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/yadis/htmltokenizer.rb', line 222

def initialize(text)
  super(text)
  if ?< != text[0] or ?> != text[-1]
    raise "Text passed to HTMLComment.initialize is not a comment"
  end

  @attr_hash = Hash.new
  @raw = text

  tag_name = text.scan(/[\w:-]+/)[0]
  if tag_name.nil?
    raise "Error, tag is nil: #{tag_name}"
  end

  if ?/ == text[1]
    # It's an end tag
    @end_tag = true
    @tag_name = '/' + tag_name.downcase
  else
    @end_tag = false
    @tag_name = tag_name.downcase
  end

  @hashed = false
end

Instance Attribute Details

#end_tagObject (readonly)

Returns the value of attribute end_tag.



221
222
223
# File 'lib/yadis/htmltokenizer.rb', line 221

def end_tag
  @end_tag
end

#tag_nameObject (readonly)

Returns the value of attribute tag_name.



221
222
223
# File 'lib/yadis/htmltokenizer.rb', line 221

def tag_name
  @tag_name
end

Instance Method Details

#attr_hashObject

Retrieve a hash of all the tag’s attributes. Lazily done, so that if you don’t look at a tag’s attributes things go quicker



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/yadis/htmltokenizer.rb', line 251

def attr_hash
  # Lazy initialize == don't build the hash until it's needed
  if !@hashed
    if !@end_tag
      # Get the attributes
      attr_arr = @raw.scan(/<[\w:-]+\s+(.*)>/m)[0]
      if attr_arr.kind_of?(Array)
        # Attributes found, parse them
        attrs = attr_arr[0]
        attr_arr = attrs.scan(/\s*([\w:-]+)(?:\s*=\s*("[^"]*"|'[^']*'|([^"'>][^\s>]*)))?/m)
        # clean up the array by:
        # * setting all nil elements to true
        # * removing enclosing quotes
        attr_arr.each {
          |item|
          val = if item[1].nil?
                  item[0]
                elsif '"'[0] == item[1][0] or '\''[0] == item[1][0]
                  item[1][1 .. -2]
                else
                  item[1]
                end
          @attr_hash[item[0].downcase] = val
        }
      end
    end
    @hashed = true
  end

  #p self

  @attr_hash
end

#textObject

Get the ‘alt’ text for a tag, if it exists, or an empty string otherwise



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/yadis/htmltokenizer.rb', line 286

def text
  if !end_tag
    case tag_name
    when 'img'
      if !attr_hash['alt'].nil?
        return attr_hash['alt']
      end
    when 'applet'
      if !attr_hash['alt'].nil?
        return attr_hash['alt']
      end
    end
  end
  return ''
end