Class: Anemone::PageStore

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/anemone/page_store.rb

Instance Method Summary collapse

Constructor Details

#initialize(storage = {}, opts) ⇒ PageStore

Returns a new instance of PageStore.



9
10
11
12
# File 'lib/anemone/page_store.rb', line 9

def initialize(storage = {}, opts)
  @storage = storage
  @opts = opts
end

Instance Method Details

#[](index) ⇒ Object

We typically index the hash with a URI, but convert it to a String for easier retrieval



16
17
18
# File 'lib/anemone/page_store.rb', line 16

def [](index)
  @storage[index.to_s]
end

#[]=(index, other) ⇒ Object



20
21
22
# File 'lib/anemone/page_store.rb', line 20

def []=(index, other)
  @storage[index.to_s] = other
end

#delete(key) ⇒ Object



24
25
26
# File 'lib/anemone/page_store.rb', line 24

def delete(key)
  @storage.delete key.to_s
end

#each_valueObject



32
33
34
# File 'lib/anemone/page_store.rb', line 32

def each_value
  each { |key, value| yield value }
end

#has_key?(key) ⇒ Boolean

Returns:

  • (Boolean)


28
29
30
# File 'lib/anemone/page_store.rb', line 28

def has_key?(key)
  @storage.has_key? key.to_s
end

#has_page?(url) ⇒ Boolean

Does this PageStore contain the specified URL? HTTP and HTTPS versions of a URL are considered to be the same page.

Returns:

  • (Boolean)


52
53
54
55
56
57
58
59
60
# File 'lib/anemone/page_store.rb', line 52

def has_page?(url)
  schemes = %w(http https)
  if schemes.include? url.scheme
    u = url.dup
    return schemes.any? { |s| u.scheme = s; has_key?(u) }
  end

  has_key? url
end

#pages_linking_to(urls) ⇒ Object

If given a single URL (as a String or URI), returns an Array of Pages which link to that URL If given an Array of URLs, returns a Hash (URI => [Page, Page…]) of Pages linking to those URLs



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/anemone/page_store.rb', line 112

def pages_linking_to(urls)
  unless urls.is_a?(Array)
    urls = [urls]
    single = true
  end

  urls.map! do |url|
    unless url.is_a?(URI)
      URI(url) rescue nil
    else
      url
    end
  end
  urls.compact

  links = {}
  urls.each { |url| links[url] = [] }
  values.each do |page|
    urls.each { |url| links[url] << page if page.links.include?(url) }
  end

  if single and !links.empty?
    return links[urls.first]
  else
    return links
  end
end

#shortest_paths!(root) ⇒ Object

Use a breadth-first search to calculate the single-source shortest paths from root to all pages in the PageStore



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/anemone/page_store.rb', line 66

def shortest_paths!(root)
  root = URI(root) if root.is_a?(String)
  raise "Root node not found" if !has_key?(root)

  q = Queue.new

  q.enq root
  root_page = self[root]
  root_page.depth = 0
  root_page.visited = true
  self[root] = root_page
  while !q.empty?
    page = self[q.deq]
    page.links.each do |u|
      begin
        link = self[u]
        next if link.nil? || !link.fetched? || link.visited

        q << u unless link.redirect?
        link.visited = true
        link.depth = page.depth + 1
        self[u] = link

        if link.redirect?
          u = link.redirect_to
          redo
        end
      end
    end
  end

  self
end

#touch_key(key) ⇒ Object



42
43
44
# File 'lib/anemone/page_store.rb', line 42

def touch_key(key)
  self[key] = @opts[:page_class].new(key)
end

#touch_keys(keys) ⇒ Object



46
47
48
# File 'lib/anemone/page_store.rb', line 46

def touch_keys(keys)
  @storage.merge! keys.inject({}) { |h, k| h[k.to_s] = @opts[:page_class].new(k); h }
end

#uniq!Object

Removes all Pages from storage where redirect? is true



103
104
105
106
# File 'lib/anemone/page_store.rb', line 103

def uniq!
  each_value { |page| delete page.url if page.redirect? }
  self
end

#urls_linking_to(urls) ⇒ Object

If given a single URL (as a String or URI), returns an Array of URLs which link to that URL If given an Array of URLs, returns a Hash (URI => [URI, URI…]) of URLs linking to those URLs



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/anemone/page_store.rb', line 144

def urls_linking_to(urls)
  unless urls.is_a?(Array)
    urls = [urls] unless urls.is_a?(Array)
    single = true
  end

  links = pages_linking_to(urls)
  links.each { |url, pages| links[url] = pages.map{|p| p.url} }

  if single and !links.empty?
    return links[urls.first]
  else
    return links
  end
end

#valuesObject



36
37
38
39
40
# File 'lib/anemone/page_store.rb', line 36

def values
  result = []
  each { |key, value| result << value }
  result
end