Module: HMachine
- Included in:
- POSH::Base, Pattern::Abbr, Pattern::DateTime, Pattern::TypeValue, Pattern::URL, Pattern::ValueClass
- Defined in:
- lib/hmachine.rb,
lib/hmachine/pattern.rb,
lib/hmachine/posh/base.rb,
lib/hmachine/microformat.rb,
lib/hmachine/pattern/url.rb,
lib/hmachine/posh/anchor.rb,
lib/hmachine/pattern/abbr.rb,
lib/hmachine/microformat/adr.rb,
lib/hmachine/microformat/geo.rb,
lib/hmachine/microformat/xfn.rb,
lib/hmachine/microformat/xmdp.rb,
lib/hmachine/microformat/xoxo.rb,
lib/hmachine/pattern/datetime.rb,
lib/hmachine/microformat/hcard.rb,
lib/hmachine/pattern/typevalue.rb,
lib/hmachine/microformat/reltag.rb,
lib/hmachine/pattern/valueclass.rb,
lib/hmachine/posh/definition_list.rb,
lib/hmachine/microformat/votelinks.rb,
lib/hmachine/microformat/rellicense.rb
Defined Under Namespace
Modules: Microformat, POSH, Pattern
Constant Summary collapse
- VERSION =
"0.1.0"
- PRODID =
"-//markwunsch.com//hMachine #{VERSION}//EN"
Class Method Summary collapse
-
.find(document, format = nil) ⇒ Object
Convenience method for HMachine::Microformat.find method.
-
.get(html) ⇒ Object
Get a string of html or a url and convert it to a Nokogiri Document.
-
.get_document(html, url = nil) ⇒ Object
Convert HTML to a Nokogiri Document.
-
.get_url(url) ⇒ Object
Open a URL and convert the contents to a Nokogiri Document.
-
.map(key) ⇒ Object
Map a key to an element or design pattern.
- .normalize(name) ⇒ Object
Instance Method Summary collapse
-
#extract(pattern = nil, &block) ⇒ Object
Define the pattern used to extract contents from node Can be a symbols that match to an Element parser, or a block.
-
#extract_from(node) ⇒ Object
Extract the content from the node.
-
#find_in(document) ⇒ Object
Search for the element in a document.
-
#found_in?(node) ⇒ Boolean
Is the element found in node?.
-
#parse(document) ⇒ Object
Parse the document, finding every instance of the desired element, and extract their contents.
-
#parse_first(document) ⇒ Object
Parse the document, extracting the content for the first instance of the element.
-
#search(&block) ⇒ Object
Get/Set a function that defines how to find an element in a node.
-
#valid?(node) ⇒ Boolean
Is this a valid node?.
-
#validate(&block) ⇒ Object
Get/Set a function that tests to make sure a given node is the element we want.
Class Method Details
.find(document, format = nil) ⇒ Object
Convenience method for HMachine::Microformat.find method
9 10 11 |
# File 'lib/hmachine.rb', line 9 def self.find(document, format=nil) HMachine::Microformat.find(document, format) end |
.get(html) ⇒ Object
Get a string of html or a url and convert it to a Nokogiri Document
14 15 16 17 18 19 20 21 22 23 |
# File 'lib/hmachine.rb', line 14 def self.get(html) return html if html.is_a?(Nokogiri::XML::Node) begin url = URI.parse(html) doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html) rescue URI::InvalidURIError doc = get_document(html) end doc end |
.get_document(html, url = nil) ⇒ Object
Convert HTML to a Nokogiri Document
36 37 38 |
# File 'lib/hmachine.rb', line 36 def self.get_document(html, url=nil) html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url) end |
.get_url(url) ⇒ Object
Open a URL and convert the contents to a Nokogiri Document
26 27 28 29 30 31 32 33 |
# File 'lib/hmachine.rb', line 26 def self.get_url(url) uri = URI.parse(url) doc = '' uri.open do |web| web.each_line {|line| doc += line } end get_document(doc, url) end |
.map(key) ⇒ Object
Map a key to an element or design pattern
45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/hmachine.rb', line 45 def self.map(key) case normalize(key) when :value_class, :valueclass, :abbr, :uri, :url, :typevalue HMachine::Pattern.map(key) when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr HMachine::Microformat.map(key) when :base HMachine::POSH::Base else raise "#{key} is not a recognized parser." end end |
.normalize(name) ⇒ Object
40 41 42 |
# File 'lib/hmachine.rb', line 40 def self.normalize(name) name.to_s.strip.downcase.intern end |
Instance Method Details
#extract(pattern = nil, &block) ⇒ Object
Define the pattern used to extract contents from node Can be a symbols that match to an Element parser, or a block
91 92 93 94 95 96 97 98 |
# File 'lib/hmachine.rb', line 91 def extract(pattern = nil, &block) if block_given? @extract = block else @extract = HMachine.map(pattern).extract if pattern end @extract || lambda{|node| node.content.strip } end |
#extract_from(node) ⇒ Object
Extract the content from the node
101 102 103 |
# File 'lib/hmachine.rb', line 101 def extract_from(node) extract.call(node) end |
#find_in(document) ⇒ Object
Search for the element in a document
67 68 69 |
# File 'lib/hmachine.rb', line 67 def find_in(document) search.call(document) end |
#found_in?(node) ⇒ Boolean
Is the element found in node?
72 73 74 |
# File 'lib/hmachine.rb', line 72 def found_in?(node) find_in(node).eql?(node) || !find_in(node).empty? end |
#parse(document) ⇒ Object
Parse the document, finding every instance of the desired element, and extract their contents
106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/hmachine.rb', line 106 def parse(document) if found_in?(document) contents = if find_in(document).respond_to?(:collect) find_in(document).collect { |element| extract_from(element) } else extract_from(document) end return contents.first if contents.respond_to?(:length) && (contents.length == 1) contents end end |
#parse_first(document) ⇒ Object
Parse the document, extracting the content for the first instance of the element
119 120 121 122 123 124 |
# File 'lib/hmachine.rb', line 119 def parse_first(document) if found_in?(document) elements = find_in(document) extract_from elements.respond_to?(:first) ? elements.first : elements end end |
#search(&block) ⇒ Object
Get/Set a function that defines how to find an element in a node. The Search function should return a Nokogiri::XML::NodeSet. eg. <tt>search {|node| node.css(element) }
61 62 63 64 |
# File 'lib/hmachine.rb', line 61 def search(&block) @search = block if block_given? @search || lambda {|node| node } end |
#valid?(node) ⇒ Boolean
Is this a valid node?
85 86 87 |
# File 'lib/hmachine.rb', line 85 def valid?(node) validate.call(node) end |
#validate(&block) ⇒ Object
Get/Set a function that tests to make sure a given node is the element we want. Should return truthy. Default just tests to see if the node passed is a child of its parent node.
79 80 81 82 |
# File 'lib/hmachine.rb', line 79 def validate(&block) @validate = block if block_given? @validate || lambda { |node| find_in(node.parent).children.include?(node) } end |