Module: Murlsh::UriAsk
- Defined in:
- lib/murlsh/uri_ask.rb
Overview
URI mixin.
Constant Summary collapse
- HtmlContentTypePattern =
%r{^text/html}i
Instance Method Summary collapse
-
#content_length(options = {}) ⇒ Object
Get the content length.
-
#content_type(options = {}) ⇒ Object
Get the content type.
-
#decode(s) ⇒ Object
Convert from the character set of this url to utf-8 and decode HTML entities.
-
#default_headers ⇒ Object
Default headers sent with the request.
-
#description(options = {}) ⇒ Object
Get the HTML meta description.
-
#doc(options = {}) ⇒ Object
Get the parsed doc at this url.
-
#get_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP GET for this URI.
-
#head_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP HEAD for this URI.
-
#header(header_name, options = {}) ⇒ Object
Get the value of a response header.
-
#html?(options = {}) ⇒ Boolean
Return true if the content type is HTML.
-
#title(options = {}) ⇒ Object
Get the HTML title.
Instance Method Details
#content_length(options = {}) ⇒ Object
Get the content length.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
20 |
# File 'lib/murlsh/uri_ask.rb', line 20 def content_length(={}); header 'content-length', ; end |
#content_type(options = {}) ⇒ Object
Get the content type.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
27 |
# File 'lib/murlsh/uri_ask.rb', line 27 def content_type(={}); header 'content-type', ; end |
#decode(s) ⇒ Object
Convert from the character set of this url to utf-8 and decode HTML entities.
116 117 118 |
# File 'lib/murlsh/uri_ask.rb', line 116 def decode(s) HTMLEntities.new.decode(Iconv.conv('utf-8', @charset, s)) end |
#default_headers ⇒ Object
Default headers sent with the request.
97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/murlsh/uri_ask.rb', line 97 def default_headers result = { 'User-Agent' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', } if host.to_s[/^www\.nytimes\.com/] result['Referer'] = 'http://news.google.com/' end result end |
#description(options = {}) ⇒ Object
Get the HTML meta description.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/murlsh/uri_ask.rb', line 51 def description(={}) return @description if defined?(@description) @description = '' d = doc() if d and d.description and not d.description.empty? @description = decode(d.description) end @description end |
#doc(options = {}) ⇒ Object
Get the parsed doc at this url.
Doc can be an Hpricot or Nokogiri doc or anything that supports the methods in Murlsh::Doc.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/murlsh/uri_ask.rb', line 73 def doc(={}) return @doc if defined?(@doc) [:headers] = default_headers.merge(.fetch(:headers, {})) @doc = nil if html?() Murlsh::failproof() do self.open([:headers]) do |f| html_parse_plugins = Murlsh::Plugin.hooks('html_parse') @doc = if html_parse_plugins.empty? Nokogiri(f).extend(Murlsh::Doc) else html_parse_plugins.first.run(f).extend(Murlsh::Doc) end @charset = @doc.charset || f.charset end end end @doc end |
#get_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP GET for this URI.
Return hash values are single strings.
Options:
-
:failproof - if true hide all exceptions and return empty hash on failure
-
:headers - hash of headers to send in request
166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/murlsh/uri_ask.rb', line 166 def get_headers(={}) return @get_headers if defined?(@get_headers) request_headers = default_headers.merge(.fetch(:headers, {})) response_headers = {} # use open-uri instead of Net::HTTP because it handles redirects Murlsh::failproof() do response_headers = self.open(request_headers) { |f| f. } end @get_headers = response_headers end |
#head_headers(options = {}) ⇒ Object
Get and cache response headers returned by HTTP HEAD for this URI.
Return hash values are lists.
Options:
-
:failproof - if true hide all exceptions and return empty hash on failure
-
:headers - hash of headers to send in request
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/murlsh/uri_ask.rb', line 138 def head_headers(={}) return @head_headers if defined?(@head_headers) request_headers = default_headers.merge(.fetch(:headers, {})) response_headers = {} Murlsh::failproof() do http = Net::HTTP.new(host, port) http.use_ssl = (scheme == 'https') extend(Murlsh::URIGetPathQuery) resp = http.request_head(get_path_query, request_headers) if Net::HTTPSuccess === resp response_headers = resp.to_hash end end @head_headers = response_headers end |
#header(header_name, options = {}) ⇒ Object
Get the value of a response header.
Options:
-
:failproof - if true hide all exceptions and return empty string on failure
-
:headers - hash of headers to send in request
125 126 127 128 129 |
# File 'lib/murlsh/uri_ask.rb', line 125 def header(header_name, ={}) result = [*head_headers()[header_name]][0] result = get_headers()[header_name] if not result or result.empty? result.to_s end |
#html?(options = {}) ⇒ Boolean
Return true if the content type is HTML.
112 |
# File 'lib/murlsh/uri_ask.rb', line 112 def html?(={}); content_type()[HtmlContentTypePattern]; end |
#title(options = {}) ⇒ Object
Get the HTML title.
Options:
-
:failproof - if true hide all exceptions and return url on failure
-
:headers - hash of headers to send in request
34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/murlsh/uri_ask.rb', line 34 def title(={}) return @title if defined?(@title) @title = to_s d = doc() if d and d.title and not d.title.empty?; @title = decode(d.title); end @title end |