Class: OpenStax::Cnx::V1::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/openstax/cnx/v1/page.rb

Constant Summary collapse

ROOT_CSS =

Start parsing here

'html > body'
LO_DEF_NODE_CSS =

Find nodes that define relevant tags

'.ost-learning-objective-def'
STD_DEF_NODE_CSS =
'.ost-standards-def'
TEKS_DEF_NODE_CSS =
'.ost-standards-teks'
APBIO_DEF_NODE_CSS =
'.ost-standards-apbio'
STD_NAME_NODE_CSS =
'.ost-standards-name'
STD_DESC_NODE_CSS =
'.ost-standards-description'
LO_REGEX =

Find specific tags and extract the relevant parts

/ost-tag-lo-([\w+-]+)/
STD_REGEX =
/ost-tag-std-([\w+-]+)/
TEKS_REGEX =
/ost-tag-(teks-[\w+-]+)/

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ Page

Returns a new instance of Page.



29
30
31
32
33
34
35
36
# File 'lib/openstax/cnx/v1/page.rb', line 29

def initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil)
  @hash         = hash
  @id           = id
  @url          = url
  @parsed_title = OpenStax::Cnx::V1::Baked.parse_title(title)
  @content      = content
  @book         = book
end

Instance Attribute Details

#bookObject (readonly)

Returns the value of attribute book.



39
40
41
# File 'lib/openstax/cnx/v1/page.rb', line 39

def book
  @book
end

#chapter_sectionObject

Returns the value of attribute chapter_section.



38
39
40
# File 'lib/openstax/cnx/v1/page.rb', line 38

def chapter_section
  @chapter_section
end

#hashObject (readonly)

Returns the value of attribute hash.



39
40
41
# File 'lib/openstax/cnx/v1/page.rb', line 39

def hash
  @hash
end

Class Method Details

.feature_node(node, feature_ids) ⇒ Object



21
22
23
24
25
26
27
# File 'lib/openstax/cnx/v1/page.rb', line 21

def self.feature_node(node, feature_ids)
  feature_ids = [feature_ids].flatten
  return if feature_ids.empty?

  feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
  node.at_css(feature_id_css)
end

Instance Method Details

#aplosObject



114
115
116
# File 'lib/openstax/cnx/v1/page.rb', line 114

def aplos
  @aplos ||= tags.select{ |tag| tag[:type] == :aplo }.map{ |tag| tag[:value] }
end

#baked_book_locationObject



55
56
57
# File 'lib/openstax/cnx/v1/page.rb', line 55

def baked_book_location
  parsed_title[:book_location]
end

#canonical_urlObject



94
95
96
# File 'lib/openstax/cnx/v1/page.rb', line 94

def canonical_url
  @canonical_url ||= url_for("#{uuid}@#{version}")
end

#contentObject



98
99
100
# File 'lib/openstax/cnx/v1/page.rb', line 98

def content
  @content ||= full_hash.fetch('content') { |key| raise "Page id=#{@id} is missing #{key}" }
end

#docObject



102
103
104
# File 'lib/openstax/cnx/v1/page.rb', line 102

def doc
  @doc ||= Nokogiri::HTML(content)
end

#elements(element_classes:) ⇒ Object

Pass the

e.g.,
  element classes = [
    OpenStax::Cnx::V1::Figure,
    OpenStax::Cnx::V1::Paragraph,
    OpenStax::Cnx::V1::KeyTerm,
  ]

you want to retrieve with the page



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/openstax/cnx/v1/page.rb', line 188

def elements(element_classes:)
  # This join is important to OR together all the xpaths in order to determine
  # the matched element's order inside the page. Xpath does this for us.
  match_all_elements = element_classes.map(&:matcher).join(' | ')

  working_element_index = []

  # Match on all the elements. Create Element objects with the matching xpath node.
  content_dom.xpath(match_all_elements).each do | xpath_element |
    element_class = element_classes.detect do | elem_class |
      elem_class.matches?(xpath_element)
    end

    element = element_class.new(node: xpath_element)
    working_element_index << element if element
  end

  working_element_index
end

#full_hashObject



78
79
80
# File 'lib/openstax/cnx/v1/page.rb', line 78

def full_hash
  @full_hash ||= OpenStax::Cnx::V1.fetch(url)
end

#idObject



41
42
43
# File 'lib/openstax/cnx/v1/page.rb', line 41

def id
  @id ||= hash.fetch('id') { |key| raise "Page is missing #{key}" }
end

#index?Boolean

Returns:

  • (Boolean)


74
75
76
# File 'lib/openstax/cnx/v1/page.rb', line 74

def index?
  content_dom.xpath('boolean(//div[contains(@class,"os-index-container")])')
end

#is_intro?Boolean

Returns:

  • (Boolean)


63
64
65
66
67
68
# File 'lib/openstax/cnx/v1/page.rb', line 63

def is_intro?
  return @is_intro unless @is_intro.nil?
  # CNX plans to implement a better way to identify chapter intro pages
  # This is a hack to be used until that happens
  @is_intro = title.start_with?('Introduction')
end

#losObject



110
111
112
# File 'lib/openstax/cnx/v1/page.rb', line 110

def los
  @los ||= tags.select{ |tag| tag[:type] == :lo }.map{ |tag| tag[:value] }
end

#parsed_titleObject



49
50
51
52
53
# File 'lib/openstax/cnx/v1/page.rb', line 49

def parsed_title
  @parsed_title ||= OpenStax::Cnx::V1::Baked.parse_title(
    hash.fetch('title') { |key| raise "#{self.class.name} id=#{@id} is missing #{key}" }
  )
end

#preface?Boolean

Returns:

  • (Boolean)


70
71
72
# File 'lib/openstax/cnx/v1/page.rb', line 70

def preface?
  content_dom.xpath('boolean(//html/body/div[@data-type="page"][@class="preface"])')
end

#remove_elements(xpath:) ⇒ Object



172
173
174
175
176
# File 'lib/openstax/cnx/v1/page.rb', line 172

def remove_elements(xpath:)
  content_dom.xpath(xpath).each do | xpath_element |
    xpath_element.remove
  end
end

#rootObject



106
107
108
# File 'lib/openstax/cnx/v1/page.rb', line 106

def root
  @root ||= doc.at_css(ROOT_CSS)
end

#short_idObject



86
87
88
# File 'lib/openstax/cnx/v1/page.rb', line 86

def short_id
  @short_id ||= full_hash.fetch('shortId', nil)
end

#tagsObject



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/openstax/cnx/v1/page.rb', line 118

def tags
  return @tags.values unless @tags.nil?

  # Start with default cnxmod tag
  cnxmod_value = "context-cnxmod:#{uuid}"
  @tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }

  # Extract tag name and description from .ost-standards-def and .os-learning-objective-def.

  # LO tags
  root.css(LO_DEF_NODE_CSS).each do |node|
    klass = node.attr('class')
    lo_value = LO_REGEX.match(klass).try(:[], 1)
    next if lo_value.nil?

    teks_value = TEKS_REGEX.match(klass).try(:[], 1)
    description = node.content.strip

    @tags[lo_value] = {
      value: lo_value,
      description: description,
      teks: teks_value,
      type: :lo
    }
  end

  # Other standards
  root.css(STD_DEF_NODE_CSS).each do |node|
    klass = node.attr('class')
    name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
    description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
    value = nil

    if node.matches?(TEKS_DEF_NODE_CSS)
      value = TEKS_REGEX.match(klass).try(:[], 1)
      type = :teks
    elsif node.matches?(APBIO_DEF_NODE_CSS)
      value = LO_REGEX.match(klass).try(:[], 1)
      type = :aplo
    end

    next if value.nil?

    @tags[value] = {
      value: value,
      name: name,
      description: description,
      type: type
    }
  end

  @tags.values
end

#titleObject



59
60
61
# File 'lib/openstax/cnx/v1/page.rb', line 59

def title
  parsed_title[:text]
end

#urlObject



45
46
47
# File 'lib/openstax/cnx/v1/page.rb', line 45

def url
  @url ||= url_for(id)
end

#uuidObject



82
83
84
# File 'lib/openstax/cnx/v1/page.rb', line 82

def uuid
  @uuid ||= full_hash.fetch('id') { |key| raise "Book id=#{@id} is missing #{key}" }
end

#versionObject



90
91
92
# File 'lib/openstax/cnx/v1/page.rb', line 90

def version
  @version ||= full_hash.fetch('version') { |key| raise "Book id=#{@id} is missing #{key}" }
end