Class: SiteData

Inherits:
Struct
  • Object
show all
Defined in:
lib/w3m-autopagerize-server.rb

Defined Under Namespace

Modules: FallbackSetup

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#blockObject

Returns the value of attribute block

Returns:

  • (Object)

    the current value of block



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def block
  @block
end

#exampleUrlObject

Returns the value of attribute exampleUrl

Returns:

  • (Object)

    the current value of exampleUrl



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def exampleUrl
  @exampleUrl
end

#insertBeforeObject

Returns the value of attribute insertBefore

Returns:

  • (Object)

    the current value of insertBefore



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def insertBefore
  @insertBefore
end

#matchObject

Returns the value of attribute match

Returns:

  • (Object)

    the current value of match



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def match
  @match
end

Returns the value of attribute nextLink

Returns:

  • (Object)

    the current value of nextLink



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def nextLink
  @nextLink
end

#pageElementObject

Returns the value of attribute pageElement

Returns:

  • (Object)

    the current value of pageElement



38
39
40
# File 'lib/w3m-autopagerize-server.rb', line 38

def pageElement
  @pageElement
end

Class Method Details

.fallback_predicate1(text, words = $FALLBACK_WORDS, patterns = $FALLBACK_PATTERNS) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/w3m-autopagerize-server.rb', line 40

def self.fallback_predicate1(text, words=$FALLBACK_WORDS, patterns=$FALLBACK_PATTERNS)
  a = [
    words.map{|w| %Q!#{text}="#{w}"!}.join(' or '),
    patterns.map{|w| %Q!contains(#{text},"#{w}")!}.join(' or '),
  ]
  a.delete ""
  a.join " or "
end

.fallback_predicate2(text, start_words = $FALLBACK_START_WORDS) ⇒ Object



49
50
51
# File 'lib/w3m-autopagerize-server.rb', line 49

def self.fallback_predicate2(text, start_words=$FALLBACK_START_WORDS)
  start_words.map{|w| %Q!starts-with(#{text},"#{w}")!}.join(' or ')
end

.fallbacksObject

link to next



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/w3m-autopagerize-server.rb', line 54

def self.fallbacks
  @fallbacks ||= lambda do
    a = [
      new("//a[#{fallback_predicate1('.')}]"),
      new("//form[descendant::input[#{fallback_predicate1('@value')}]]"),
    ]
    if $FALLBACK_START_WORDS.to_a.length > 0
      a.concat [
        new("//a[#{fallback_predicate2('.')}]"),
        new("//form[descendant::input[#{fallback_predicate2('@value')}]]"),
      ]
    end
    a.extend(FallbackSetup)
  end.call
end

Instance Method Details

#next_url(uri) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/w3m-autopagerize-server.rb', line 89

def next_url(uri)
  uri = URI(uri.to_s)
  result = instance_exec(uri, match, &block) if block
  xpath = nextLink
  if xpath
    nokogiri = $nokogiri_cache[uri.to_s]
    $logger.info "#{__method__}: use xpath #{xpath}"
    nodes = nokogiri.xpath(xpath)
    node = nodes.first
    $logger.debug "#{__method__}: nodes.length = #{nodes.length}"
    nexturl = (node["href"] || node["action"] || node["value"]) rescue nil
    #  nexturl = nokogiri.xpath("#{xpath}/@href").first.content rescue nil
    $logger.info "#{__method__}: nexturl = #{nexturl or 'NOT FOUND'}"
    if nexturl
      nexturl.gsub!(/ /, '+') # for some buggy sites not encoding spaces
      uri.merge nexturl
    end
  else
    $logger.info "#{__method__}: result = #{result}"
    uri.merge result
  end
end