Class: OpenStax::Cnx::V1::Page
- Inherits:
-
Object
- Object
- OpenStax::Cnx::V1::Page
- Defined in:
- lib/openstax/cnx/v1/page.rb
Constant Summary collapse
- ROOT_CSS =
Start parsing here
'html > body'- LO_DEF_NODE_CSS =
Find nodes that define relevant tags
'.ost-learning-objective-def'- STD_DEF_NODE_CSS =
'.ost-standards-def'- TEKS_DEF_NODE_CSS =
'.ost-standards-teks'- APBIO_DEF_NODE_CSS =
'.ost-standards-apbio'- STD_NAME_NODE_CSS =
'.ost-standards-name'- STD_DESC_NODE_CSS =
'.ost-standards-description'- LO_REGEX =
Find specific tags and extract the relevant parts
/ost-tag-lo-([\w+-]+)/- STD_REGEX =
/ost-tag-std-([\w+-]+)/- TEKS_REGEX =
/ost-tag-(teks-[\w+-]+)/
Instance Attribute Summary collapse
-
#book ⇒ Object
readonly
Returns the value of attribute book.
-
#chapter_section ⇒ Object
Returns the value of attribute chapter_section.
-
#hash ⇒ Object
readonly
Returns the value of attribute hash.
Class Method Summary collapse
Instance Method Summary collapse
- #aplos ⇒ Object
- #baked_book_location ⇒ Object
- #canonical_url ⇒ Object
- #content ⇒ Object
- #doc ⇒ Object
-
#elements(element_classes:) ⇒ Object
Pass the.
- #full_hash ⇒ Object
- #id ⇒ Object
- #index? ⇒ Boolean
-
#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ Page
constructor
A new instance of Page.
- #is_intro? ⇒ Boolean
- #los ⇒ Object
- #parsed_title ⇒ Object
- #preface? ⇒ Boolean
- #remove_elements(xpath:) ⇒ Object
- #root ⇒ Object
- #short_id ⇒ Object
- #tags ⇒ Object
- #title ⇒ Object
- #url ⇒ Object
- #uuid ⇒ Object
- #version ⇒ Object
Constructor Details
#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ Page
Returns a new instance of Page.
29 30 31 32 33 34 35 36 |
# File 'lib/openstax/cnx/v1/page.rb', line 29 def initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) @hash = hash @id = id @url = url @parsed_title = OpenStax::Cnx::V1::Baked.parse_title(title) @content = content @book = book end |
Instance Attribute Details
#book ⇒ Object (readonly)
Returns the value of attribute book.
39 40 41 |
# File 'lib/openstax/cnx/v1/page.rb', line 39 def book @book end |
#chapter_section ⇒ Object
Returns the value of attribute chapter_section.
38 39 40 |
# File 'lib/openstax/cnx/v1/page.rb', line 38 def chapter_section @chapter_section end |
#hash ⇒ Object (readonly)
Returns the value of attribute hash.
39 40 41 |
# File 'lib/openstax/cnx/v1/page.rb', line 39 def hash @hash end |
Class Method Details
.feature_node(node, feature_ids) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/openstax/cnx/v1/page.rb', line 21 def self.feature_node(node, feature_ids) feature_ids = [feature_ids].flatten return if feature_ids.empty? feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ') node.at_css(feature_id_css) end |
Instance Method Details
#aplos ⇒ Object
114 115 116 |
# File 'lib/openstax/cnx/v1/page.rb', line 114 def aplos @aplos ||= .select{ |tag| tag[:type] == :aplo }.map{ |tag| tag[:value] } end |
#baked_book_location ⇒ Object
55 56 57 |
# File 'lib/openstax/cnx/v1/page.rb', line 55 def baked_book_location parsed_title[:book_location] end |
#canonical_url ⇒ Object
94 95 96 |
# File 'lib/openstax/cnx/v1/page.rb', line 94 def canonical_url @canonical_url ||= url_for("#{uuid}@#{version}") end |
#content ⇒ Object
98 99 100 |
# File 'lib/openstax/cnx/v1/page.rb', line 98 def content @content ||= full_hash.fetch('content') { |key| raise "Page id=#{@id} is missing #{key}" } end |
#doc ⇒ Object
102 103 104 |
# File 'lib/openstax/cnx/v1/page.rb', line 102 def doc @doc ||= Nokogiri::HTML(content) end |
#elements(element_classes:) ⇒ Object
Pass the
e.g.,
element classes = [
OpenStax::Cnx::V1::Figure,
OpenStax::Cnx::V1::Paragraph,
OpenStax::Cnx::V1::KeyTerm,
]
you want to retrieve with the page
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# File 'lib/openstax/cnx/v1/page.rb', line 188 def elements(element_classes:) # This join is important to OR together all the xpaths in order to determine # the matched element's order inside the page. Xpath does this for us. match_all_elements = element_classes.map(&:matcher).join(' | ') working_element_index = [] # Match on all the elements. Create Element objects with the matching xpath node. content_dom.xpath(match_all_elements).each do | xpath_element | element_class = element_classes.detect do | elem_class | elem_class.matches?(xpath_element) end element = element_class.new(node: xpath_element) working_element_index << element if element end working_element_index end |
#full_hash ⇒ Object
78 79 80 |
# File 'lib/openstax/cnx/v1/page.rb', line 78 def full_hash @full_hash ||= OpenStax::Cnx::V1.fetch(url) end |
#id ⇒ Object
41 42 43 |
# File 'lib/openstax/cnx/v1/page.rb', line 41 def id @id ||= hash.fetch('id') { |key| raise "Page is missing #{key}" } end |
#index? ⇒ Boolean
74 75 76 |
# File 'lib/openstax/cnx/v1/page.rb', line 74 def index? content_dom.xpath('boolean(//div[contains(@class,"os-index-container")])') end |
#is_intro? ⇒ Boolean
63 64 65 66 67 68 |
# File 'lib/openstax/cnx/v1/page.rb', line 63 def is_intro? return @is_intro unless @is_intro.nil? # CNX plans to implement a better way to identify chapter intro pages # This is a hack to be used until that happens @is_intro = title.start_with?('Introduction') end |
#los ⇒ Object
110 111 112 |
# File 'lib/openstax/cnx/v1/page.rb', line 110 def los @los ||= .select{ |tag| tag[:type] == :lo }.map{ |tag| tag[:value] } end |
#parsed_title ⇒ Object
49 50 51 52 53 |
# File 'lib/openstax/cnx/v1/page.rb', line 49 def parsed_title @parsed_title ||= OpenStax::Cnx::V1::Baked.parse_title( hash.fetch('title') { |key| raise "#{self.class.name} id=#{@id} is missing #{key}" } ) end |
#preface? ⇒ Boolean
70 71 72 |
# File 'lib/openstax/cnx/v1/page.rb', line 70 def preface? content_dom.xpath('boolean(//html/body/div[@data-type="page"][@class="preface"])') end |
#remove_elements(xpath:) ⇒ Object
172 173 174 175 176 |
# File 'lib/openstax/cnx/v1/page.rb', line 172 def remove_elements(xpath:) content_dom.xpath(xpath).each do | xpath_element | xpath_element.remove end end |
#root ⇒ Object
106 107 108 |
# File 'lib/openstax/cnx/v1/page.rb', line 106 def root @root ||= doc.at_css(ROOT_CSS) end |
#short_id ⇒ Object
86 87 88 |
# File 'lib/openstax/cnx/v1/page.rb', line 86 def short_id @short_id ||= full_hash.fetch('shortId', nil) end |
#tags ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/openstax/cnx/v1/page.rb', line 118 def return @tags.values unless @tags.nil? # Start with default cnxmod tag cnxmod_value = "context-cnxmod:#{uuid}" @tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } } # Extract tag name and description from .ost-standards-def and .os-learning-objective-def. # LO tags root.css(LO_DEF_NODE_CSS).each do |node| klass = node.attr('class') lo_value = LO_REGEX.match(klass).try(:[], 1) next if lo_value.nil? teks_value = TEKS_REGEX.match(klass).try(:[], 1) description = node.content.strip @tags[lo_value] = { value: lo_value, description: description, teks: teks_value, type: :lo } end # Other standards root.css(STD_DEF_NODE_CSS).each do |node| klass = node.attr('class') name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip) description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip) value = nil if node.matches?(TEKS_DEF_NODE_CSS) value = TEKS_REGEX.match(klass).try(:[], 1) type = :teks elsif node.matches?(APBIO_DEF_NODE_CSS) value = LO_REGEX.match(klass).try(:[], 1) type = :aplo end next if value.nil? @tags[value] = { value: value, name: name, description: description, type: type } end @tags.values end |
#title ⇒ Object
59 60 61 |
# File 'lib/openstax/cnx/v1/page.rb', line 59 def title parsed_title[:text] end |
#url ⇒ Object
45 46 47 |
# File 'lib/openstax/cnx/v1/page.rb', line 45 def url @url ||= url_for(id) end |
#uuid ⇒ Object
82 83 84 |
# File 'lib/openstax/cnx/v1/page.rb', line 82 def uuid @uuid ||= full_hash.fetch('id') { |key| raise "Book id=#{@id} is missing #{key}" } end |
#version ⇒ Object
90 91 92 |
# File 'lib/openstax/cnx/v1/page.rb', line 90 def version @version ||= full_hash.fetch('version') { |key| raise "Book id=#{@id} is missing #{key}" } end |