Class: BigSitemap

Inherits:
Object
  • Object
show all
Includes:
ActionController::UrlWriter
Defined in:
lib/big_sitemap.rb,
lib/big_sitemap/builder.rb

Defined Under Namespace

Classes: Builder

Constant Summary collapse

DEFAULTS =
{
  :max_per_sitemap => Builder::MAX_URLS,
  :batch_size      => 1001,
  :path            => 'sitemaps',
  :gzip            => true,

  # opinionated
  :ping_google => true,
  :ping_yahoo  => false, # needs :yahoo_app_id
  :ping_msn    => false,
  :ping_ask    => false
}
COUNT_METHODS =
[:count_for_sitemap, :count]
FIND_METHODS =
[:find_for_sitemap, :all]
TIMESTAMP_METHODS =
[:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
PARAM_METHODS =
[:to_param, :id]

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ BigSitemap

Returns a new instance of BigSitemap.



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/big_sitemap.rb', line 27

def initialize(options)
  @options = DEFAULTS.merge options

  # Use Rails' default_url_options if available
  @default_url_options = defined?(Rails) ? default_url_options : {}

  if @options[:max_per_sitemap] <= 1
    raise ArgumentError, '":max_per_sitemap" must be greater than 1'
  end

  if @options[:url_options]
    @default_url_options.update @options[:url_options]
  elsif @options[:base_url]
    uri = URI.parse(@options[:base_url])
    @default_url_options[:host]     = uri.host
    @default_url_options[:port]     = uri.port
    @default_url_options[:protocol] = uri.scheme
  else
    raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
  end

  if @options[:batch_size] > @options[:max_per_sitemap]
    raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
  end

  @options[:document_root] ||= begin
    if defined? Rails
      "#{Rails.root}/public"
    elsif defined? Merb
      "#{Merb.root}/public"
    end
  end

  unless @options[:document_root]
    raise ArgumentError, 'Document root must be specified with the ":document_root" option'
  end

  @file_path = "#{@options[:document_root]}/#{strip_leading_slash(@options[:path])}"
  Dir.mkdir(@file_path) unless File.exists? @file_path

  @sources       = []
  @sitemap_files = []
end

Instance Method Details

#add(model, options = {}) ⇒ Object



71
72
73
74
75
# File 'lib/big_sitemap.rb', line 71

def add(model, options={})
  options[:path] ||= Extlib::Inflection.tableize(model.to_s)
  @sources << [model, options.dup]
  return self
end

#cleanObject



77
78
79
80
81
82
# File 'lib/big_sitemap.rb', line 77

def clean
  Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"].each do |file|
    FileUtils.rm file
  end
  return self
end

#generateObject



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/big_sitemap.rb', line 84

def generate
  for model, options in @sources
    with_sitemap(Extlib::Inflection::tableize(model.to_s)) do |sitemap|
      count_method = pick_method(model, COUNT_METHODS)
      find_method  = pick_method(model, FIND_METHODS)
      raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil?
      raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil?

      count        = model.send(count_method)
      num_sitemaps = 1
      num_batches  = 1

      if count > @options[:batch_size]
        num_batches  = (count.to_f / @options[:batch_size].to_f).ceil
        num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil
      end
      batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f

      find_options = options.except(:path, :num_items, :priority, :change_frequency)

      for sitemap_num in 1..num_sitemaps
        # Work out the start and end batch numbers for this sitemap
        batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i
        batch_num_end   = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1

        for batch_num in batch_num_start..batch_num_end
          offset       = ((batch_num - 1) * @options[:batch_size])
          limit        = (count - offset) < @options[:batch_size] ? (count - offset - 1) : @options[:batch_size]
          find_options.update(:limit => limit, :offset => offset) if num_batches > 1

          model.send(find_method, find_options).each do |record|
            last_mod_method = pick_method(record, TIMESTAMP_METHODS)
            last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method)

            param_method = pick_method(record, PARAM_METHODS)

            location = defined?(Rails) ? polymorphic_url(record) : nil rescue nil
            location ||= "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}"

            change_frequency = options[:change_frequency] || 'weekly'
            freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency

            priority = options[:priority]
            pri = priority.is_a?(Proc) ? priority.call(record) : priority

            sitemap.add_url!(location, last_mod, freq, pri)
          end
        end
      end
    end
  end

  generate_sitemap_index

  return self
end

#ping_search_enginesObject



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/big_sitemap.rb', line 141

def ping_search_engines
  require 'net/http'
  require 'cgi'

  sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))

  if @options[:ping_google]
    Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
  end

  if @options[:ping_yahoo]
    if @options[:yahoo_app_id]
      Net::HTTP.get(
        'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
          "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
      )
    else
      $stderr.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
    end
  end

  if @options[:ping_msn]
    Net::HTTP.get('webmaster.live.com', "/ping.aspx?siteMap=#{sitemap_uri}")
  end

  if @options[:ping_ask]
    Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
  end
end

#root_urlObject



171
172
173
174
175
176
177
178
179
# File 'lib/big_sitemap.rb', line 171

def root_url
  @root_url ||= begin
    url = ''
    url << (@default_url_options[:protocol] || 'http')
    url << '://' unless url.match('://')
    url << @default_url_options[:host]
    url << ":#{port}" if port = @default_url_options[:port] and port != 80
  end
end