Class: Raakt::Test

Inherits:
Object
  • Object
show all
Defined in:
lib/raakt.rb,
lib/iso_language_codes.rb

Constant Summary collapse

ISO_CODES =
[
"aa",
"ab",
"ae",
"af",
"ak",
"am",
"an",
"ar",
"as",
"av",
"ay",
"az",
"ba",
"be",
"bg",
"bh",
"bi",
"bm",
"bn",
"bo",
"br",
"bs",
"ca",
"ce",
"ch",
"co",
"cr",
"cs",
"cv",
"cy",
"da",
"de",
"dv",
"dz",
"ee",
"el",
"en",
"eo",
"es",
"et",
"eu",
"fa",
"ff",
"fi",
"fj",
"fo",
"fr",
"fy",
"ga",
"gd",
"gl",
"gn",
"gu",
"gv",
"ha",
"he",
"hi",
"ho",
"hr",
"ht",
"hu",
"hy",
"hz",
"ia",
"id",
"ie",
"ig",
"ii",
"ik",
"io",
"is",
"it",
"iu",
"ja",
"jv",
"ka",
"kg",
"ki",
"kj",
"kk",
"kl",
"km",
"kn",
"ko",
"kr",
"ks",
"ku",
"kv",
"kw",
"ky",
"la",
"lb",
"lg",
"li",
"ln",
"lo",
"lt",
"lv",
"mg",
"mh",
"mi",
"mk",
"ml",
"mn",
"mo",
"mr",
"ms",
"mt",
"my",
"na",
"nb",
"nd",
"ne",
"ng",
"nl",
"nn",
"no",
"nr",
"nv",
"ny",
"oc",
"oj",
"om",
"or",
"os",
"pa",
"pi",
"pl",
"ps",
"pt",
"qu",
"rm",
"rn",
"ro",
"ru",
"rw",
"sa",
"sc",
"sd",
"se",
"sg",
"sh",
"si",
"sk",
"sl",
"sm",
"sn",
"so",
"sq",
"sr",
"ss",
"st",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"ti",
"tk",
"tl",
"tn",
"to",
"tr",
"ts",
"tt",
"tw",
"ty",
"ug",
"uk",
"ur",
"uz",
"ve",
"vi",
"vo",
"wa",
"wo",
"xh",
"yi",
"yo",
"za",
"zh",
"zu"]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html = nil, headers = nil, wordlist = nil) ⇒ Test

Returns a new instance of Test.



85
86
87
88
89
90
91
92
93
# File 'lib/raakt.rb', line 85

def initialize(html=nil, headers=nil, wordlist=nil)
  @html = html
	  @headers = headers
	  @wordlist = wordlist
  self.doc = @html if html
	  self.headers = @headers if headers
	  self.wordlist = @wordlist if wordlist
	  @ignore_bi = false 
end

Instance Attribute Details

#headersObject

Returns the value of attribute headers.



83
84
85
# File 'lib/raakt.rb', line 83

def headers
  @headers
end

#htmlObject

Returns the value of attribute html.



83
84
85
# File 'lib/raakt.rb', line 83

def html
  @html
end

#ignore_biObject

Returns the value of attribute ignore_bi.



83
84
85
# File 'lib/raakt.rb', line 83

def ignore_bi
  @ignore_bi
end

#wordlistObject

Returns the value of attribute wordlist.



83
84
85
# File 'lib/raakt.rb', line 83

def wordlist
  @wordlist
end

Instance Method Details

#allObject

Call all check methods.



112
113
114
115
116
117
118
119
120
121
122
# File 'lib/raakt.rb', line 112

def all
  messages = []
  
  self.methods.each do |method|
    if method[0..5] == "check_"
      messages += self.send(method)
    end
  end
  
  return messages
end

#alt_to_text(element) ⇒ Object



524
525
526
527
528
529
530
# File 'lib/raakt.rb', line 524

def alt_to_text(element)
		if element.kind_of?(Hpricot::Elem) then
  		element.has_attribute?("alt") ? element['alt'] : ""
		else
			""
		end
end

#check_areasObject

Verify that all area elements have a non-empty alt attribute. See UWEM 1.0 Test 1.1_HTML_01 (together with check_images)



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/raakt.rb', line 204

def check_areas
		messages = []
		area_elements = (@doc/"area")
		area_elements.map { |element| 
			unless element['alt']
messages << ErrorMessage.new(:missing_area_alt, element['name'] || element['id'] || "unknown") 
			else
if element['alt'].length == 0
	messages << ErrorMessage.new(:missing_area_alt_text, element['name'] || element['id'] || "unknown")
end
			end
		}

		messages
end

#check_character_setObject

Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/raakt.rb', line 148

def check_character_set
	messages = []
	header_charset = meta_charset = ""
	if @headers and @headers.length > 0 then
		if @headers.has_key?("content-type")
			header_charset = parse_charset(@headers["content-type"].to_s)
		end

		#get meta element charset
		meta_elements = @doc.search("//meta[@http-equiv]")
		for element in meta_elements do
			if element["http-equiv"].downcase == "content-type" then
				meta_charset = parse_charset(element["content"])
			end
		end

		if header_charset.length > 0 and meta_charset.length > 0
			unless meta_charset == header_charset
				messages << ErrorMessage.new(:charset_mismatch) 
			end
		end
	end

	return messages

end

#check_difficult_wordsObject



426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
# File 'lib/raakt.rb', line 426

def check_difficult_words
     messages = []
	if @wordlist

		# get document text (and all title and ait attributes but remove blockquote and q elements)

		# remove q and blockquotes
		@doc.search("blockquote").remove
		@doc.search("q").remove

		doctext = @doc.inner_text

		#add alt texts
		@doc.search("*[@alt]").each { |item|
			doctext += " " + item['alt']
			doctext += ", "
		}

		#add title texts
		@doc.search("*[@title]").each { |item|
			doctext += " " + item['title']
			doctext += ", "
		}

		@wordlist.each { |key, value| 
			re = Regexp.new("\\b" + key.sub(/ /, "\\s+") + "\\b", true)   
			if doctext =~ re
				# loop over all keys in wordlist
				messages << ErrorMessage.new(:difficult_word, value)
			end
		}
		
		
	end
	return messages
end

#check_document_structureObject

Verify that heading elements (h1-h6) appear in the correct order (no levels skipped). See UWEM 1.0 Test 3.5_HTML_03.



239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/raakt.rb', line 239

def check_document_structure
  messages = []
  currentitem = 0
  
  for heading in headings
    if currentitem == 0
      if level(heading.name) != 1
        messages << ErrorMessage.new(:first_h_not_h1, "h" + heading.name[1,1])
      end
    else
      if level(heading.name) - level(headings[currentitem - 1].name) > 1
        messages << ErrorMessage.new(:wrong_h_structure)
        break
      end  
    end
    
    currentitem += 1
    
  end
  
  messages
end

#check_embedObject

Verify that the embed element isn’t used. See UWEM 1.0 Test 1.1_HTML_06.



141
142
143
144
# File 'lib/raakt.rb', line 141

def check_embed
	return [ErrorMessage.new(:embed_used)] unless (@doc/'embed').empty?
	[]
end

#check_fieldset_legendObject

Verify that all fieldset elements have a legend child element. See UWEM 1.0 Test 12.3_HTML_01.



126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/raakt.rb', line 126

def check_fieldset_legend
	messages = []
	fieldsets = (@doc/"fieldset")
	fieldset_instance = 1
	for fieldset in fieldsets 
		if (fieldset/"legend").empty?
			messages << ErrorMessage.new(:fieldset_missing_legend, fieldset_instance.to_s)				
		end
		fieldset_instance += 1
	end
	messages
end

#check_for_formatting_elementsObject

Verify that no formatting elements have been used. See UWEM 1.0 Test 7.2_HTML_01 and Test 7.3_HTML_01.



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/raakt.rb', line 302

def check_for_formatting_elements
  
  	messages = []

formatting_elements = %w(font b i u tt small big strike s)
		formatting_elements = %w(font u tt small big strike s) if @ignore_bi
	  
 formatting_items = (@doc/formatting_elements.join('|'))
  
  	unless formatting_items.empty?
			found_elements = []
			for element in formatting_items
found_elements << element.name
			end
    	messages << ErrorMessage.new(:missing_semantics, "#{found_elements.uniq.join(', ')}")  
 end
	  
 flicker_elements = %w(blink marquee)
 flicker_items = (@doc/flicker_elements.uniq.join('|'))
 
  	unless flicker_items.empty?
    	messages << ErrorMessage.new(:has_flicker)  
  	end

  	messages   
end

#check_for_language_infoObject

Verify that the root documet html element as a lang attribute.



331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/raakt.rb', line 331

def check_for_language_info
  messages = []  
	  unless (@doc/'html[@lang]').empty?
lang_code = (@doc/"html").first["lang"].to_s
if lang_code.length < 2
  		messages << ErrorMessage.new(:missing_lang_info) 
end
	  else
  	messages << ErrorMessage.new(:missing_lang_info) 
	  end
	  messages
end

#check_for_nested_tablesObject

Verify that the document does not have any nested tables. This is indicative of a table-based layout.



264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/raakt.rb', line 264

def check_for_nested_tables
  
  messages = []  
  tables = (@doc/"table")
  
  for table in tables
    unless (table/"table").empty?
      return messages << ErrorMessage.new(:has_nested_tables)
    end
  end
  
  messages
end

#check_formObject

Verify that all form fields have a corresponding label element. See UWEM 1.0 Test 12.4_HTML_02.



377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/raakt.rb', line 377

def check_form
  messages = []
  labels = get_labels
  fields = get_editable_fields
  
  #make sure all fields have associated labels
  label_for_ids = []
  for label in labels
    if label["for"]
      label_for_ids << label["for"]
    end
  end
  
  field_id = nil
  
  for field in fields
    field_id = (field["id"] || "")
    field_identifier = (field["id"] || field["name"] || "unknown")
    if not label_for_ids.include?(field_id)
      messages << ErrorMessage.new(:field_missing_label, field_identifier)
    end
  end   
  
  messages
end

#check_framesObject

Verify that all frame elements have a title atribute.



405
406
407
408
409
410
411
412
413
# File 'lib/raakt.rb', line 405

def check_frames
	  # Covers UWEM Test 12.1_HTML_01
  return [] unless is_frameset
  
  (@doc/"frame").find_all do |frame|
    frame_title = frame['title'] || ''
    normalize_text(frame_title).empty?
  end.map { |frame| ErrorMessage.new(:missing_frame_title, frame['src']) }            
end

#check_has_headingObject

Verify that the document has at least one h1 element.



232
233
234
235
# File 'lib/raakt.rb', line 232

def check_has_heading
  return [ErrorMessage.new(:missing_heading)] if (@doc/"h1").empty?
  []
end

#check_imagesObject

Verify that all img elements have an alt attribute.



197
198
199
200
# File 'lib/raakt.rb', line 197

def check_images
  no_alt_images = (@doc/"img:not([@alt])")
  no_alt_images.map { |img| ErrorMessage.new(:missing_alt, img['src']) }
end

#check_input_type_imgObject

Verify that all input type=image elements have an alt attribute.



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/raakt.rb', line 177

def check_input_type_img
	#Covers UWEM 1.0 Test 1.1_HTML_01

	messages = []
	image_input_buttons = @doc.search("input").select { |element| element['type'] =~ /image/i }
	image_input_buttons.map { |element| 
		unless element['alt']
			messages << ErrorMessage.new(:missing_input_alt, element['name'] || element['id'] || "") 
		else
			if element['alt'].length == 0
				messages << ErrorMessage.new(:missing_input_alt_text, element['name'] || element['id'] || "")
			end
		end
	}

	messages
end

Verify that no link texts are ambiguous. A typical example is the presence of multiple “Read more” links.



364
365
366
367
368
369
370
371
372
373
# File 'lib/raakt.rb', line 364

def check_link_text
  links = get_links
  
  link = links.find do |link|
    links.find { |cmp_link| is_ambiguous_link(link, cmp_link) }
  end
  
  return [] unless link
  [ErrorMessage.new(:ambiguous_link_text, get_link_text(link))]
end

#check_refreshObject

Verify that the document does not use meta-refresh to redirect the user away after a period of time.



417
418
419
420
421
422
423
# File 'lib/raakt.rb', line 417

def check_refresh
  meta_elements = (@doc/'meta')
  
  meta_elements.find_all do |element|
    element["http-equiv"] == "refresh"
  end.map { ErrorMessage.new(:has_meta_refresh) }
end

#check_tablesObject

Verify that all tables have at least on table header (th) element.



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/raakt.rb', line 280

def check_tables
  messages = []  
  tables = (@doc/"table")   
  currenttable = 1
  
  for table in tables     
  	hasth = false
    hasth = true unless (table/">tr>th").empty?
    hasth = true unless (table/">thead>tr>th").empty?
    hasth = true unless (table/">tbody>tr>th").empty?
    
    messages << ErrorMessage.new(:missing_th, currenttable.to_s) unless hasth
            
    currenttable += 1
  end
  
  messages
end

#check_titleObject

Verify that the document has a non-empty title element.



223
224
225
226
227
228
# File 'lib/raakt.rb', line 223

def check_title
  title = @doc.at('title')
  return [ErrorMessage.new(:missing_title)] unless title
  return [ErrorMessage.new(:empty_title)] if normalize_text(title.inner_html).empty?
  []			
end

#check_valid_language_codeObject

Verify that the html element has a valid lang code.



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/raakt.rb', line 346

def check_valid_language_code
  messages = []
  unless (@doc/"html[@lang]").empty?
	#load list of valid language codes
	#iso_lang_codes = []
	#IO.foreach(File.dirname(__FILE__) + "/iso_language_codes.txt") { |code| iso_lang_codes << code.chomp }

	doc_main_lang_code = (@doc/"html").first["lang"].to_s.downcase
	unless ISO_CODES.include?(doc_main_lang_code[0..1])
		messages << ErrorMessage.new(:wrong_lang_code, doc_main_lang_code)
	end
  end

  messages
end

#doc=(html) ⇒ Object

Set the HTML used in the test.



96
97
98
99
# File 'lib/raakt.rb', line 96

def doc=(html)
	  Hpricot.buffer_size = 524288 #Allow for asp.net bastard-sized viewstate attributes...
  @doc = Hpricot(html)
end

#downcase_hash_keys(a_hash) ⇒ Object



481
482
483
484
485
# File 'lib/raakt.rb', line 481

def downcase_hash_keys(a_hash)
	downcased_hash = {}
	a_hash.collect {|key,value| downcased_hash[key.downcase] = value}
	return downcased_hash
end

#elements_to_text(element) ⇒ Object



532
533
534
535
536
537
538
539
# File 'lib/raakt.rb', line 532

def elements_to_text(element)
  str = ''
  element.traverse_all_element do |elem|
    elem.kind_of?(Hpricot::Text) ? str += "#{elem}" : str += alt_to_text(elem)
  end
  
  str
end

#get_editable_fieldsObject



564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
# File 'lib/raakt.rb', line 564

def get_editable_fields
  allfields = (@doc/"textarea|select|input")
  fields = []
  field_type = ""
  
  for field in allfields do
    field_type = field["type"] || ""
    unless ["button", "submit", "hidden", "image"].include?(field_type)
      fields << field
    end
    
  end
  
  return fields
end

#get_labelsObject



559
560
561
# File 'lib/raakt.rb', line 559

def get_labels
  @doc/'label'
end


599
600
601
602
# File 'lib/raakt.rb', line 599

def get_link_text(link)
  text = (elements_to_text(link) || '').strip
  normalize_text(text)
end


608
609
610
611
# File 'lib/raakt.rb', line 608

def get_link_title(link)
  text = (link['title'] || '').strip
  normalize_text(text)
end


604
605
606
# File 'lib/raakt.rb', line 604

def get_link_url(link)
  link['href']
end


505
506
507
# File 'lib/raakt.rb', line 505

def get_links      
  (@doc/'a')
end

#headingsObject

Utility methods



467
468
469
470
471
472
473
# File 'lib/raakt.rb', line 467

def headings
		items = []
	 	@doc.traverse_element("h1", "h2", "h3", "h4", "h5", "h6") { |heading|
			items << heading
}
		return items
end


497
498
499
500
501
502
503
# File 'lib/raakt.rb', line 497

def is_ambiguous_link(link_a, link_b)
  return false if links_point_to_same_resource?(link_a, link_b)
  return true if link_text_identical?(link_a, link_b) &&
                 link_title_identical?(link_a, link_b)
  
  false
end

#is_framesetObject



581
582
583
# File 'lib/raakt.rb', line 581

def is_frameset
  (@doc/"frameset").length > 0
end

#langinfo(element) ⇒ Object



509
510
511
512
513
514
515
516
517
518
519
520
521
# File 'lib/raakt.rb', line 509

def langinfo(element)
  langval = ""
  
  if element.class.to_s == 'Tag'      
    if element['lang']
      langval = element['lang']
    end      
  else
    return nil
  end
  
  return langval
end

#level(heading) ⇒ Object



476
477
478
# File 'lib/raakt.rb', line 476

def level(heading)
  Integer(heading[1].chr)
end

Returns:

  • (Boolean)


586
587
588
# File 'lib/raakt.rb', line 586

def link_text_identical?(link_a, link_b)
  get_link_text(link_a) == get_link_text(link_b)
end

Returns:

  • (Boolean)


590
591
592
# File 'lib/raakt.rb', line 590

def link_title_identical?(link_a, link_b)
  get_link_title(link_a) == get_link_title(link_b)
end

Returns:

  • (Boolean)


594
595
596
597
# File 'lib/raakt.rb', line 594

def links_point_to_same_resource?(link_a, link_b)
  (link_a == link_b) ||
  (get_link_url(link_a) == get_link_url(link_b))
end

#normalize_text(text) ⇒ Object



542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'lib/raakt.rb', line 542

def normalize_text(text)
  text ||= ''
  retval = text.gsub(/&nbsp;/, ' ')
  retval = retval.gsub(/&#160;/, ' ')
  retval = retval.gsub(/\n/, '')
  retval = retval.gsub(/\r/, '')
  retval = retval.gsub(/\t/, '')
  while /  /.match(retval) do
    retval = retval.gsub(/  /, ' ')
  end
  
  retval = retval.strip
  
  return retval
end

#parse_charset(contenttype) ⇒ Object



487
488
489
490
491
492
493
494
# File 'lib/raakt.rb', line 487

def parse_charset(contenttype)
	# get charset identifier from content type string
	if contenttype=~/charset=(.*)\w?/ then
		return $1.downcase.strip
	end

	return ""
end