Module: HMachine

Included in:
POSH::Base, Pattern::Abbr, Pattern::DateTime, Pattern::TypeValue, Pattern::URL, Pattern::ValueClass
Defined in:
lib/hmachine.rb,
lib/hmachine/pattern.rb,
lib/hmachine/posh/base.rb,
lib/hmachine/microformat.rb,
lib/hmachine/pattern/url.rb,
lib/hmachine/posh/anchor.rb,
lib/hmachine/pattern/abbr.rb,
lib/hmachine/microformat/adr.rb,
lib/hmachine/microformat/geo.rb,
lib/hmachine/microformat/xfn.rb,
lib/hmachine/microformat/xmdp.rb,
lib/hmachine/microformat/xoxo.rb,
lib/hmachine/pattern/datetime.rb,
lib/hmachine/microformat/hcard.rb,
lib/hmachine/pattern/typevalue.rb,
lib/hmachine/microformat/reltag.rb,
lib/hmachine/pattern/valueclass.rb,
lib/hmachine/posh/definition_list.rb,
lib/hmachine/microformat/votelinks.rb,
lib/hmachine/microformat/rellicense.rb

Defined Under Namespace

Modules: Microformat, POSH, Pattern

Constant Summary collapse

VERSION =
"0.1.0"
PRODID =
"-//markwunsch.com//hMachine #{VERSION}//EN"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.find(document, format = nil) ⇒ Object

Convenience method for HMachine::Microformat.find method



9
10
11
# File 'lib/hmachine.rb', line 9

def self.find(document, format=nil)
  HMachine::Microformat.find(document, format)
end

.get(html) ⇒ Object

Get a string of html or a url and convert it to a Nokogiri Document



14
15
16
17
18
19
20
21
22
23
# File 'lib/hmachine.rb', line 14

def self.get(html)
  return html if html.is_a?(Nokogiri::XML::Node)
  begin
    url = URI.parse(html)
    doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html)
  rescue URI::InvalidURIError
    doc = get_document(html)
  end
  doc
end

.get_document(html, url = nil) ⇒ Object

Convert HTML to a Nokogiri Document



36
37
38
# File 'lib/hmachine.rb', line 36

def self.get_document(html, url=nil)
  html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url)
end

.get_url(url) ⇒ Object

Open a URL and convert the contents to a Nokogiri Document



26
27
28
29
30
31
32
33
# File 'lib/hmachine.rb', line 26

def self.get_url(url)
  uri = URI.parse(url)
  doc = ''
  uri.open do |web|
    web.each_line {|line| doc += line }
  end
  get_document(doc, url)
end

.map(key) ⇒ Object

Map a key to an element or design pattern



45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/hmachine.rb', line 45

def self.map(key)
  case normalize(key)
    when :value_class, :valueclass, :abbr, :uri, :url, :typevalue
      HMachine::Pattern.map(key)
    when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr
      HMachine::Microformat.map(key)
    when :base
      HMachine::POSH::Base
    else
      raise "#{key} is not a recognized parser."
  end
end

.normalize(name) ⇒ Object



40
41
42
# File 'lib/hmachine.rb', line 40

def self.normalize(name)
  name.to_s.strip.downcase.intern
end

Instance Method Details

#extract(pattern = nil, &block) ⇒ Object

Define the pattern used to extract contents from node Can be a symbols that match to an Element parser, or a block



91
92
93
94
95
96
97
98
# File 'lib/hmachine.rb', line 91

def extract(pattern = nil, &block)
  if block_given?
    @extract = block 
  else
    @extract = HMachine.map(pattern).extract if pattern
  end
  @extract || lambda{|node| node.content.strip }
end

#extract_from(node) ⇒ Object

Extract the content from the node



101
102
103
# File 'lib/hmachine.rb', line 101

def extract_from(node)
  extract.call(node)
end

#find_in(document) ⇒ Object

Search for the element in a document



67
68
69
# File 'lib/hmachine.rb', line 67

def find_in(document)
  search.call(document)
end

#found_in?(node) ⇒ Boolean

Is the element found in node?

Returns:

  • (Boolean)


72
73
74
# File 'lib/hmachine.rb', line 72

def found_in?(node)
  find_in(node).eql?(node) || !find_in(node).empty?
end

#parse(document) ⇒ Object

Parse the document, finding every instance of the desired element, and extract their contents



106
107
108
109
110
111
112
113
114
115
116
# File 'lib/hmachine.rb', line 106

def parse(document)
  if found_in?(document)
    contents = if find_in(document).respond_to?(:collect)
      find_in(document).collect { |element| extract_from(element) }
    else
      extract_from(document)
    end
    return contents.first if contents.respond_to?(:length) && (contents.length == 1)
    contents
  end
end

#parse_first(document) ⇒ Object

Parse the document, extracting the content for the first instance of the element



119
120
121
122
123
124
# File 'lib/hmachine.rb', line 119

def parse_first(document)
  if found_in?(document)
    elements = find_in(document)
    extract_from elements.respond_to?(:first) ? elements.first : elements 
  end
end

#search(&block) ⇒ Object

Get/Set a function that defines how to find an element in a node. The Search function should return a Nokogiri::XML::NodeSet. eg. <tt>search {|node| node.css(element) }



61
62
63
64
# File 'lib/hmachine.rb', line 61

def search(&block)
  @search = block if block_given?
  @search || lambda {|node| node }
end

#valid?(node) ⇒ Boolean

Is this a valid node?

Returns:

  • (Boolean)


85
86
87
# File 'lib/hmachine.rb', line 85

def valid?(node)
  validate.call(node)
end

#validate(&block) ⇒ Object

Get/Set a function that tests to make sure a given node is the element we want. Should return truthy. Default just tests to see if the node passed is a child of its parent node.



79
80
81
82
# File 'lib/hmachine.rb', line 79

def validate(&block)
  @validate = block if block_given?
  @validate || lambda { |node| find_in(node.parent).children.include?(node) }
end