Class: W3MExtractLinks

Inherits:
AbstractIndex show all
Includes:
StripTags
Defined in:
lib/langhelp/langhelp-base.rb

Overview

Extract links from an HTML file, and create lh-w3m hyperlinks.

Defined Under Namespace

Classes: Link

Constant Summary

Constants included from StripTags

StripTags::ALT, StripTags::NQ, StripTags::Q

Constants inherited from AbstractIndex

AbstractIndex::SPACES

Constants included from LocalVariables

LocalVariables::ANCHOR_BEGIN, LocalVariables::ANCHOR_END

Constants included from KanjiConverter

KanjiConverter::KCONVERTERS

Instance Attribute Summary

Attributes inherited from AbstractIndex

#arg1, #conf, #title

Instance Method Summary collapse

Methods included from StripTags

#html2txt!

Methods inherited from AbstractIndex

#initialize, #output_title

Methods included from LocalVariables

#insert_local_variables

Methods included from FilenameString

#abbreviate_filename, #abbreviate_filename!, #normalize_filename!

Methods included from KanjiConverter

#encoding, #kconv

Methods included from MkArray

#mkarray

Methods included from EmacsLispString

#lisp_dump_string

Constructor Details

This class inherits a constructor from AbstractIndex

Instance Method Details

#init(x = {}) ⇒ Object



230
231
232
233
234
235
236
# File 'lib/langhelp/langhelp-base.rb', line 230

def init(x={})
  @html = arg1
  normalize_filename! @html
  @base = URI.parse("file://#{x[:base] || @html}")
  @exclude_label = mkarray(x[:exclude_label])
  @exclude_url   = mkarray(x[:exclude_url])
end


242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/langhelp/langhelp-base.rb', line 242

def links
  src = kconv{ File.read @html }
  src.scan(%r!<a .*?href=#{Q}(#{NQ})#{Q}.*?>(.+?)</a>!imo).map {|href,label|
    uri = @base.merge href
    url = uri.to_s
    path = uri.path
    html2txt! label
    next if @exclude_label.any?{|exclude| exclude === label}
    next if @exclude_url.any?{|exclude| exclude === href}
    abbreviate_filename! path
    url = path if uri.scheme == 'file'
    Link.new(url,label)
  }.compact
end

#to_e(out) ⇒ Object



257
258
259
260
261
262
# File 'lib/langhelp/langhelp-base.rb', line 257

def to_e(out)
  links.each{|link|
    out << %Q!# #{link.label}#{SPACES}(lh-w3m nil #{lisp_dump_string(link.href)})\n!
  }
  
end