Class: BigSitemap

Inherits:
Object
  • Object
show all
Defined in:
lib/big_sitemap.rb

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ BigSitemap

Returns a new instance of BigSitemap.

Raises:

  • (ArgumentError)


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/big_sitemap.rb', line 8

def initialize(options)
  document_root = options.delete(:document_root)

  if document_root.nil?
    if defined? RAILS_ROOT
      document_root = "#{RAILS_ROOT}/public"
    elsif defined? Merb
      document_root = "#{Merb.root}/public"
    end
  end

  raise ArgumentError, 'Document root must be specified with the :document_root option' if document_root.nil?

  @base_url        = options.delete(:base_url)
  @max_per_sitemap = options.delete(:max_per_sitemap) || 50000
  @batch_size      = options.delete(:batch_size) || 1001 # TODO: Set this to 1000 once DM offset 37000 bug is fixed
  @web_path        = options.delete(:path) || 'sitemaps'
  @ping_google     = options[:ping_google].nil? ? true : options.delete(:ping_google)
  @ping_yahoo      = options[:ping_yahoo].nil? ? true : options.delete(:ping_yahoo)
  @yahoo_app_id    = options.delete(:yahoo_app_id)
  @ping_msn        = options[:ping_msn].nil? ? true : options.delete(:ping_msn)
  @ping_ask        = options[:ping_ask].nil? ? true : options.delete(:ping_ask)
  @file_path       = "#{document_root}/#{@web_path}"
  @sources         = []

  raise ArgumentError, "Base URL must be specified with the :base_url option" if @base_url.nil?

  raise(
    ArgumentError,
    'Batch size (:batch_size) must be less than or equal to maximum URLs per sitemap (:max_per_sitemap)'
  ) if @batch_size > @max_per_sitemap

  Dir.mkdir(@file_path) unless File.exists? @file_path
end

Instance Method Details

#add(options) ⇒ Object

Raises:

  • (ArgumentError)


43
44
45
46
47
# File 'lib/big_sitemap.rb', line 43

def add(options)
  raise ArgumentError, ':model and :path options must be provided' unless options[:model] && options[:path]
  @sources << options
  self # Chainable
end

#cleanObject



49
50
51
52
53
54
55
56
57
# File 'lib/big_sitemap.rb', line 49

def clean
  unless @file_path.nil?
    Dir.foreach(@file_path) do |f|
      f = "#{@file_path}/#{f}"
      File.delete(f) if File.file?(f)
    end
  end
  self # Chainable
end

#generateObject



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/big_sitemap.rb', line 59

def generate
  @sources.each do |source|
    klass = source[:model]

    count_method = pick_method(klass, [:count_for_sitemap, :count])
    find_method  = pick_method(klass, [:find_for_sitemap, :all])
    raise ArgumentError, "#{klass} must provide a count_for_sitemap class method" if count_method.nil?
    raise ArgumentError, "#{klass} must provide a find_for_sitemap class method" if find_method.nil?

    count        = klass.send(count_method)
    num_sitemaps = 1
    num_batches  = 1

    if count > @batch_size
      num_batches  = (count.to_f / @batch_size.to_f).ceil
      num_sitemaps = (count.to_f / @max_per_sitemap.to_f).ceil
    end
    batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f

    # Update the @sources hash so that the index file knows how many sitemaps to link to
    source[:num_sitemaps] = num_sitemaps

    for sitemap_num in 1..num_sitemaps
      # Work out the start and end batch numbers for this sitemap
      batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i
      batch_num_end   = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1

      # Stream XML output to a file
      filename = "sitemap_#{Extlib::Inflection::underscore(klass.to_s)}"
      filename << "_#{sitemap_num}" if num_sitemaps > 1

      gz = gz_writer("#{filename}.xml.gz")

      xml = Builder::XmlMarkup.new(:target => gz)
      xml.instruct!
      xml.urlset(:xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9') do
        for batch_num in batch_num_start..batch_num_end
          offset       = ((batch_num - 1) * @batch_size)
          limit        = (count - offset) < @batch_size ? (count - offset - 1) : @batch_size
          find_options = num_batches > 1 ? {:limit => limit, :offset => offset} : {}

          klass.send(find_method, find_options).each do |r|
            last_mod_method = pick_method(
              r,
              [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
            )
            last_mod = last_mod_method.nil? ? Time.now : r.send(last_mod_method)

            param_method = pick_method(r, [:to_param, :id])
            raise ArgumentError, "#{klass} must provide a to_param instance method" if param_method.nil?

            xml.url do
              xml.loc("#{@base_url}/#{source[:path]}/#{r.send(param_method)}")
              xml.lastmod(last_mod.strftime('%Y-%m-%d')) unless last_mod.nil?
              xml.changefreq('weekly')
            end
          end
        end
      end

      gz.close
    end

  end

  generate_sitemap_index
  ping_search_engines
  self # Chainable
end