Class: WebmasterTools

Inherits:
Object
  • Object
show all
Defined in:
lib/webmaster_tools.rb

Overview

WebmasterTools required parameters:

:username  - google username or email
:password  - password in plaintext

Constant Summary collapse

LOGIN =
"https://accounts.google.com/ServiceLogin?service=sitemaps"
AUTH =
"https://accounts.google.com/ServiceLoginAuth"
DASHBOARD =
"https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
STATS =
"https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
SUGGESTS =
"https://www.google.com/webmasters/tools/html-suggestions?hl=en&siteUrl=%s"
REMOVAL =
"https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
REMOVALS =
"https://www.google.com/webmasters/tools/url-removal?hl=en&siteUrl=%s&urlt=%s&rlf=all&grid.r=0&grid.s=%s"
GWT_URL =
"https://www.google.com/webmasters/tools/gwt/"
GWT =
{
  # :select => {
  #   :action => "SITE_SELECTOR",
  #   :perm   => "3E83D794404733556D909F0916E6641E",
  #   :data   => '7|0|13|%s|FCC81D20B05EEB177130C930CD8B412E|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.siteselector.SiteSelectorService|getAllSites|com.google.crawl.wmconsole.fe.feature.gwt.base.shared.FeatureContext/101412349|java.lang.String/2004016611|/webmasters/tools|{"currentSiteName":"testscloud-sitemaps.cloudservice-sitemap.hoostings.com","recentSiteUrls":["https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://testscloud.com/","https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://www.testscloud.com/","https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://testscloud-sitemaps.s3.hoostings.com/","https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://m.testscloud.com/","https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://sandbox-testscloud.com/"],"hasMultipleSites":true,"siteFaviconUrl":"//s2.googleusercontent.com/s2/favicons?domain_url=http://testscloud-sitemaps.cloudservice-sitemap.hoostings.com/","recentSiteNames":["testscloud.com","www.testscloud.com","testscloud-sitemaps.s3.hoostings.com","m.testscloud.com","sandbox-testscloud.com"]}|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|http://testscloud-sitemaps.cloudservice-sitemap.hoostings.com/|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|https://www.google.com/webmasters/tools/sitemap-list?hl=en&siteUrl=http://testscloud-sitemaps.cloudservice-sitemap.hoostings.com/#MAIN_TAB=0&CARD_TAB=-1|1|2|3|4|2|5|6|5|7|8|9|5|10|11|12|5|13|',
  #   :dl     => "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s",
  # },
  :info => {
    :action => "SITEMAPS_READ",
    :perm   => "3E83D794404733556D909F0916E6641E",
    :data   => "7|0|11|%s|0DD967D4FC5CC1A0702DC7ECFB48549A|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.base.shared.FeatureContext/101412349|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|0|8|6|9|10|11|5|1|0|",
    :dl     => "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s",
  },
  :error => {
    :action => "CRAWLERRORS_READ",
    :perm   => "3E83D794404733556D909F0916E6641E",
    :data   => "7|0|10|%s|5ED7DB19A1883A7245AB65FD59F043C3|com.google.crawl.wmconsole.fe.feature.gwt.crawlerrors.shared.CrawlErrorsService|getSiteLevelData|com.google.crawl.wmconsole.fe.feature.gwt.base.shared.FeatureContext/101412349|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|1|5|5|6|0|7|1|8|9|10|5|",
    :dl     => "https://www.google.com/webmasters/tools/crawl-errors-new-dl?hl=en&siteUrl=%s&security_token=%s",
  }
}

Instance Method Summary collapse

Constructor Details

#initialize(username, password) ⇒ WebmasterTools

Returns a new instance of WebmasterTools.



43
44
45
# File 'lib/webmaster_tools.rb', line 43

def initialize(username, password)
  (username, password)
end

Instance Method Details

#crawl_error_counts(url, split = false) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/webmaster_tools.rb', line 147

def crawl_error_counts(url, split = false)
  url = norm_url(url)
  token = security_token(:error, url)
  page  = agent.get(GWT[:error][:dl] % [CGI::escape(url), token])

  lines = page.content.split("\n").map do |line|
    line.split(",")
  end
  head  = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }

  errors = lines.inject({}) do |hash, line|
    url, response_code, _, detected, category = *line
    detected = "20#{$3}-#{'%02d' % $1.to_i}-#{'%02d' % $2.to_i}" if /(\d{1,2})\/(\d{1,2})\/(\d{2})/ =~ detected
    if !category.to_s.empty? && !(category =~ /[\/%]/)
      sub_hash = split ? (hash[detected] ||= {}) : hash
      sub_hash[to_key(category)] ||= 0
      sub_hash[to_key(category)]  += 1
    end
    hash
  end
  Hash[errors.sort { |a,b| a[0] <=> b[0] }]
end

#crawl_info(url) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/webmaster_tools.rb', line 132

def crawl_info(url)
  url   = norm_url(url)
  token = security_token(:info, url)
  page  = agent.get(GWT[:info][:dl] % [CGI::escape(url), token])

  lines = page.content.split("\n").map do |line|
    line.split(",")
  end
  head  = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }

  lines.map do |line|
    Hash[head.zip(line)]
  end
end

#crawl_stats(url) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/webmaster_tools.rb', line 66

def crawl_stats(url)
  url   = CGI::escape norm_url(url)
  types = %w(pages kilobytes milliseconds).map(&:to_sym)
  head  = %w(high avg low).map(&:to_sym)

  page  = agent.get(STATS % url)

  Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
    node.text.gsub(/\D/, '').to_i
  end.each_slice(3).map do |slice|
    Hash[head.zip(slice)]
  end)]
end

#dashboard(url) ⇒ Object



56
57
58
59
60
61
62
# File 'lib/webmaster_tools.rb', line 56

def dashboard(url)
  url   = CGI::escape norm_url(url)
  page  = agent.get(DASHBOARD % url)
  page.search("#sitemap tbody .rightmost").map do |node|
    { :indexed_web => node.text.gsub(/\D/, '').to_i }
  end
end

#login(username, password) ⇒ Object



47
48
49
50
51
52
53
54
# File 'lib/webmaster_tools.rb', line 47

def (username, password)
  page = agent.get(LOGIN)
  page = agent.submit(page.form.tap do |form|
    form.Email  = username
    form.Passwd = password
  end)
  raise "Wrong username + password combination" if page.content.include?(AUTH)
end

#removal_stats(url, max_results = 100) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/webmaster_tools.rb', line 103

def removal_stats(url, max_results = 100)
  url   = CGI::escape norm_url(url)
  page  = agent.get(REMOVALS % [url, CGI::escape(url), max_results])

  removals_array = page.search('.grid tr').collect do |row|
    next if row.at("td[1]").nil?

    url     =    row.search('.wmt-external-url').text.strip
    status  =    row.search(".removal-status").text.strip.split(' ')[0]
    type    =    row.search('td')[2].text.strip
    date    =    row.search('td')[3].text.strip

    if status.include?('Removed')
      status = 'Removed'
    elsif status.include?('Denied')
      status = 'Denied'
    elsif status.include?('Pending')
      status = 'Pending'
    else
      status = 'Unknown'
    end

    {:url => url, :status => status, :type => type, :date => date}
  end.compact
  return removals_array
end

#remove_url(url_with_file, removal_type = "PAGE") ⇒ Object

Possible Removal Types are: [“PAGE”, “PAGE_CACHE”, “DIRECTORY”]



93
94
95
96
97
98
99
100
101
# File 'lib/webmaster_tools.rb', line 93

def remove_url(url_with_file, removal_type = "PAGE")
  url   = CGI::escape norm_url(url_with_file)
  page  = agent.get(REMOVAL % [url, CGI::escape(url_with_file)])

  page.form.field_with(:name => 'removalmethod').value = removal_type
  page  = agent.submit page.form
  files = page.search(".wmt-external-url").map { |n| File.basename(n.text) }
  raise "could not submit URL" unless files.include?(File.basename(url_with_file))
end

#suggest_counts(url) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/webmaster_tools.rb', line 80

def suggest_counts(url)
  url  = CGI::escape norm_url(url)
  page = agent.get(SUGGESTS % url)

  page.search(".g-section tr").inject({}) do |hash, n|
    if (key = n.search("a").first) && (value = n.search(".pages").first)
      hash[to_key(key.text)] = to_value(value.text)
    end
    hash
  end
end