Class: Tychus::Parsers::Base
- Inherits:
-
Object
- Object
- Tychus::Parsers::Base
- Defined in:
- lib/tychus/parsers/base.rb
Direct Known Subclasses
Instance Attribute Summary collapse
-
#doc ⇒ Object
readonly
Returns the value of attribute doc.
-
#recipe ⇒ Object
readonly
Returns the value of attribute recipe.
-
#recipe_doc ⇒ Object
readonly
Returns the value of attribute recipe_doc.
-
#uri ⇒ Object
readonly
Returns the value of attribute uri.
Class Method Summary collapse
Instance Method Summary collapse
- #clean_instructions(obj) ⇒ Object
-
#initialize(uri) ⇒ Base
constructor
A new instance of Base.
- #parse ⇒ Object
- #parse_author ⇒ Object
- #parse_cook_time ⇒ Object
- #parse_description ⇒ Object
- #parse_duration(node) ⇒ Object
- #parse_image ⇒ Object
- #parse_ingredients ⇒ Object
- #parse_name ⇒ Object
- #parse_prep_time ⇒ Object
- #parse_recipe_instructions ⇒ Object
- #parse_recipe_yield ⇒ Object
- #parse_total_time ⇒ Object
- #recipe_attributes ⇒ Object
- #Value(obj) ⇒ Object
Constructor Details
#initialize(uri) ⇒ Base
Returns a new instance of Base.
31 32 33 34 35 36 |
# File 'lib/tychus/parsers/base.rb', line 31 def initialize(uri) @uri = uri @recipe = Recipe.new @doc = Nokogiri::HTML(open(uri)) @recipe_doc = @doc.css(self.class.root_doc) end |
Instance Attribute Details
#doc ⇒ Object (readonly)
Returns the value of attribute doc.
7 8 9 |
# File 'lib/tychus/parsers/base.rb', line 7 def doc @doc end |
#recipe ⇒ Object (readonly)
Returns the value of attribute recipe.
7 8 9 |
# File 'lib/tychus/parsers/base.rb', line 7 def recipe @recipe end |
#recipe_doc ⇒ Object (readonly)
Returns the value of attribute recipe_doc.
7 8 9 |
# File 'lib/tychus/parsers/base.rb', line 7 def recipe_doc @recipe_doc end |
#uri ⇒ Object (readonly)
Returns the value of attribute uri.
7 8 9 |
# File 'lib/tychus/parsers/base.rb', line 7 def uri @uri end |
Class Method Details
.recipe_attributes ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/tychus/parsers/base.rb', line 9 def self.recipe_attributes # TODO: clear up these attributes. Are they used? Real example to # verify? # recipeType # photo # published # summary # review - see schema.org/Review i[ name description prep_time cook_time total_time recipe_yield ingredients recipe_instructions image ] end |
Instance Method Details
#clean_instructions(obj) ⇒ Object
138 139 140 |
# File 'lib/tychus/parsers/base.rb', line 138 def clean_instructions(obj) obj end |
#parse ⇒ Object
38 39 40 41 42 43 44 |
# File 'lib/tychus/parsers/base.rb', line 38 def parse recipe_attributes.each do |attr| property_value = __send__("parse_#{attr}") recipe.__send__("#{attr}=", Value(property_value)) end recipe end |
#parse_author ⇒ Object
46 47 48 49 |
# File 'lib/tychus/parsers/base.rb', line 46 def # is it always first? itemprop_node_for(:author).content end |
#parse_cook_time ⇒ Object
80 81 82 83 84 |
# File 'lib/tychus/parsers/base.rb', line 80 def parse_cook_time # is it always first? # leverage iso8601 parse_duration(itemprop_node_for(:cookTime)) end |
#parse_description ⇒ Object
51 52 53 54 |
# File 'lib/tychus/parsers/base.rb', line 51 def parse_description # is it always first? itemprop_node_for(:description).content end |
#parse_duration(node) ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/tychus/parsers/base.rb', line 109 def parse_duration(node) # Allrecipes - 'time' element # Foodnetwork - 'meta' element (std according to # Schema.org/Recipe) case node.name when "meta", "span" node.attr('content') when "time" node.attr('datetime') else NullObject.new end end |
#parse_image ⇒ Object
86 87 88 89 |
# File 'lib/tychus/parsers/base.rb', line 86 def parse_image # is it always first? itemprop_node_for(:image).attr('src') end |
#parse_ingredients ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/tychus/parsers/base.rb', line 91 def parse_ingredients # NOT FIRST recipe_doc .css('[itemprop="ingredients"]') .map do |ingredient_node| ingredient_node .element_children .map(&:content) .join(" ") end.reject(&:blank?) end |
#parse_name ⇒ Object
75 76 77 78 |
# File 'lib/tychus/parsers/base.rb', line 75 def parse_name # is it always first? itemprop_node_for(:name).content end |
#parse_prep_time ⇒ Object
103 104 105 106 107 |
# File 'lib/tychus/parsers/base.rb', line 103 def parse_prep_time # is it always first? # leverage iso8601 parse_duration(itemprop_node_for(:prepTime)) end |
#parse_recipe_instructions ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/tychus/parsers/base.rb', line 56 def parse_recipe_instructions # strip empty strings, drop trailing whitespace, clean carriage returns (\r\n) # # Allrecipes: <li><span>lorem ipsum</span></li> # FoodNetwork: <p>lorem ipsum</p> # reject headers such as "Directions" and divs such as .categories for Foodnetwork recipes reject_regex = /^(h.|div)$/ clean_instructions(itemprop_node_for(:recipeInstructions) .element_children .reject { |node| node.name =~ reject_regex } .map do |node| node.content .squeeze(" ") .rstrip .split("\r\n\s\r\n\s") end.flatten.reject(&:blank?)) end |
#parse_recipe_yield ⇒ Object
123 124 125 126 |
# File 'lib/tychus/parsers/base.rb', line 123 def parse_recipe_yield # is it always first? itemprop_node_for(:recipeYield).content end |
#parse_total_time ⇒ Object
128 129 130 131 132 |
# File 'lib/tychus/parsers/base.rb', line 128 def parse_total_time # is it always first? # leverage iso8601 parse_duration(itemprop_node_for(:totalTime)) end |
#recipe_attributes ⇒ Object
134 135 136 |
# File 'lib/tychus/parsers/base.rb', line 134 def recipe_attributes self.class.recipe_attributes end |
#Value(obj) ⇒ Object
142 143 144 145 146 147 |
# File 'lib/tychus/parsers/base.rb', line 142 def Value(obj) case obj when NullObject then nil else obj end end |