Class: OpenStax::Cnx::V1::Page

Inherits:

Object

Object
OpenStax::Cnx::V1::Page

show all

Defined in:: lib/openstax/cnx/v1/page.rb

Constant Summary collapse

ROOT_CSS = Start parsing here

'html > body'

LO_DEF_NODE_CSS = Find nodes that define relevant tags

'.ost-learning-objective-def'

STD_DEF_NODE_CSS =

'.ost-standards-def'

TEKS_DEF_NODE_CSS =

'.ost-standards-teks'

APBIO_DEF_NODE_CSS =

'.ost-standards-apbio'

STD_NAME_NODE_CSS =

'.ost-standards-name'

STD_DESC_NODE_CSS =

'.ost-standards-description'

LO_REGEX = Find specific tags and extract the relevant parts

/ost-tag-lo-([\w+-]+)/

STD_REGEX =

/ost-tag-std-([\w+-]+)/

TEKS_REGEX =

/ost-tag-(teks-[\w+-]+)/

Instance Attribute Summary collapse

#book ⇒ Object readonly

Returns the value of attribute book.
#chapter_section ⇒ Object

Returns the value of attribute chapter_section.
#hash ⇒ Object readonly

Returns the value of attribute hash.

Class Method Summary collapse

.feature_node(node, feature_ids) ⇒ Object

Instance Method Summary collapse

Constructor Details

#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ `Page`

Returns a new instance of Page.

# File 'lib/openstax/cnx/v1/page.rb', line 29

def initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil)
  @hash         = hash
  @id           = id
  @url          = url
  @parsed_title = OpenStax::Cnx::V1::Baked.parse_title(title)
  @content      = content
  @book         = book
end

Instance Attribute Details

#book ⇒ `Object` (readonly)

Returns the value of attribute book.



39
40
41

# File 'lib/openstax/cnx/v1/page.rb', line 39

def book
  @book
end

#chapter_section ⇒ `Object`

Returns the value of attribute chapter_section.



38
39
40

# File 'lib/openstax/cnx/v1/page.rb', line 38

def chapter_section
  @chapter_section
end

#hash ⇒ `Object` (readonly)

Returns the value of attribute hash.



39
40
41

# File 'lib/openstax/cnx/v1/page.rb', line 39

def hash
  @hash
end

Class Method Details

.feature_node(node, feature_ids) ⇒ `Object`

# File 'lib/openstax/cnx/v1/page.rb', line 21

def self.feature_node(node, feature_ids)
  feature_ids = [feature_ids].flatten
  return if feature_ids.empty?

  feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
  node.at_css(feature_id_css)
end

Instance Method Details

#aplos ⇒ `Object`



114
115
116

# File 'lib/openstax/cnx/v1/page.rb', line 114

def aplos
  @aplos ||= tags.select{ |tag| tag[:type] == :aplo }.map{ |tag| tag[:value] }
end

#baked_book_location ⇒ `Object`



55
56
57

# File 'lib/openstax/cnx/v1/page.rb', line 55

def baked_book_location
  parsed_title[:book_location]
end

#canonical_url ⇒ `Object`



94
95
96

# File 'lib/openstax/cnx/v1/page.rb', line 94

def canonical_url
  @canonical_url ||= url_for("#{uuid}@#{version}")
end

#content ⇒ `Object`



98
99
100

# File 'lib/openstax/cnx/v1/page.rb', line 98

def content
  @content ||= full_hash.fetch('content') { |key| raise "Page id=#{@id} is missing #{key}" }
end

#doc ⇒ `Object`



102
103
104

# File 'lib/openstax/cnx/v1/page.rb', line 102

def doc
  @doc ||= Nokogiri::HTML(content)
end

#elements(element_classes:) ⇒ `Object`

Pass the

e.g.,
  element classes = [
    OpenStax::Cnx::V1::Figure,
    OpenStax::Cnx::V1::Paragraph,
    OpenStax::Cnx::V1::KeyTerm,
  ]

you want to retrieve with the page

# File 'lib/openstax/cnx/v1/page.rb', line 188

def elements(element_classes:)
  # This join is important to OR together all the xpaths in order to determine
  # the matched element's order inside the page. Xpath does this for us.
  match_all_elements = element_classes.map(&:matcher).join(' | ')

  working_element_index = []

  # Match on all the elements. Create Element objects with the matching xpath node.
  content_dom.xpath(match_all_elements).each do | xpath_element |
    element_class = element_classes.detect do | elem_class |
      elem_class.matches?(xpath_element)
    end

    element = element_class.new(node: xpath_element)
    working_element_index << element if element
  end

  working_element_index
end

#full_hash ⇒ `Object`



78
79
80

# File 'lib/openstax/cnx/v1/page.rb', line 78

def full_hash
  @full_hash ||= OpenStax::Cnx::V1.fetch(url)
end

#id ⇒ `Object`



41
42
43

# File 'lib/openstax/cnx/v1/page.rb', line 41

def id
  @id ||= hash.fetch('id') { |key| raise "Page is missing #{key}" }
end

#index? ⇒ `Boolean`

Returns:

(Boolean)



74
75
76

# File 'lib/openstax/cnx/v1/page.rb', line 74

def index?
  content_dom.xpath('boolean(//div[contains(@class,"os-index-container")])')
end

#is_intro? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/openstax/cnx/v1/page.rb', line 63

def is_intro?
  return @is_intro unless @is_intro.nil?
  # CNX plans to implement a better way to identify chapter intro pages
  # This is a hack to be used until that happens
  @is_intro = title.start_with?('Introduction')
end

#los ⇒ `Object`



110
111
112

# File 'lib/openstax/cnx/v1/page.rb', line 110

def los
  @los ||= tags.select{ |tag| tag[:type] == :lo }.map{ |tag| tag[:value] }
end

#parsed_title ⇒ `Object`

# File 'lib/openstax/cnx/v1/page.rb', line 49

def parsed_title
  @parsed_title ||= OpenStax::Cnx::V1::Baked.parse_title(
    hash.fetch('title') { |key| raise "#{self.class.name} id=#{@id} is missing #{key}" }
  )
end

#preface? ⇒ `Boolean`

Returns:

(Boolean)



70
71
72

# File 'lib/openstax/cnx/v1/page.rb', line 70

def preface?
  content_dom.xpath('boolean(//html/body/div[@data-type="page"][@class="preface"])')
end

#remove_elements(xpath:) ⇒ `Object`

# File 'lib/openstax/cnx/v1/page.rb', line 172

def remove_elements(xpath:)
  content_dom.xpath(xpath).each do | xpath_element |
    xpath_element.remove
  end
end

#root ⇒ `Object`



106
107
108

# File 'lib/openstax/cnx/v1/page.rb', line 106

def root
  @root ||= doc.at_css(ROOT_CSS)
end

#short_id ⇒ `Object`



86
87
88

# File 'lib/openstax/cnx/v1/page.rb', line 86

def short_id
  @short_id ||= full_hash.fetch('shortId', nil)
end

#tags ⇒ `Object`

# File 'lib/openstax/cnx/v1/page.rb', line 118

def tags
  return @tags.values unless @tags.nil?

  # Start with default cnxmod tag
  cnxmod_value = "context-cnxmod:#{uuid}"
  @tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }

  # Extract tag name and description from .ost-standards-def and .os-learning-objective-def.

  # LO tags
  root.css(LO_DEF_NODE_CSS).each do |node|
    klass = node.attr('class')
    lo_value = LO_REGEX.match(klass).try(:[], 1)
    next if lo_value.nil?

    teks_value = TEKS_REGEX.match(klass).try(:[], 1)
    description = node.content.strip

    @tags[lo_value] = {
      value: lo_value,
      description: description,
      teks: teks_value,
      type: :lo
    }
  end

  # Other standards
  root.css(STD_DEF_NODE_CSS).each do |node|
    klass = node.attr('class')
    name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
    description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
    value = nil

    if node.matches?(TEKS_DEF_NODE_CSS)
      value = TEKS_REGEX.match(klass).try(:[], 1)
      type = :teks
    elsif node.matches?(APBIO_DEF_NODE_CSS)
      value = LO_REGEX.match(klass).try(:[], 1)
      type = :aplo
    end

    next if value.nil?

    @tags[value] = {
      value: value,
      name: name,
      description: description,
      type: type
    }
  end

  @tags.values
end

#title ⇒ `Object`



59
60
61

# File 'lib/openstax/cnx/v1/page.rb', line 59

def title
  parsed_title[:text]
end

#url ⇒ `Object`



45
46
47

# File 'lib/openstax/cnx/v1/page.rb', line 45

def url
  @url ||= url_for(id)
end

#uuid ⇒ `Object`



82
83
84

# File 'lib/openstax/cnx/v1/page.rb', line 82

def uuid
  @uuid ||= full_hash.fetch('id') { |key| raise "Book id=#{@id} is missing #{key}" }
end

#version ⇒ `Object`



90
91
92

# File 'lib/openstax/cnx/v1/page.rb', line 90

def version
  @version ||= full_hash.fetch('version') { |key| raise "Book id=#{@id} is missing #{key}" }
end

Class: OpenStax::Cnx::V1::Page

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ Page

Instance Attribute Details

#book ⇒ Object (readonly)

#chapter_section ⇒ Object

#hash ⇒ Object (readonly)

Class Method Details

.feature_node(node, feature_ids) ⇒ Object

Instance Method Details

#aplos ⇒ Object

#baked_book_location ⇒ Object

#canonical_url ⇒ Object

#content ⇒ Object

#doc ⇒ Object

#elements(element_classes:) ⇒ Object

#full_hash ⇒ Object

#id ⇒ Object

#index? ⇒ Boolean

#is_intro? ⇒ Boolean

#los ⇒ Object

#parsed_title ⇒ Object

#preface? ⇒ Boolean

#remove_elements(xpath:) ⇒ Object

#root ⇒ Object

#short_id ⇒ Object

#tags ⇒ Object

#title ⇒ Object

#url ⇒ Object

#uuid ⇒ Object

#version ⇒ Object

#initialize(hash: {}, id: nil, url: nil, title: nil, content: nil, book: nil) ⇒ `Page`

#book ⇒ `Object` (readonly)

#chapter_section ⇒ `Object`

#hash ⇒ `Object` (readonly)

.feature_node(node, feature_ids) ⇒ `Object`

#aplos ⇒ `Object`

#baked_book_location ⇒ `Object`

#canonical_url ⇒ `Object`

#content ⇒ `Object`

#doc ⇒ `Object`

#elements(element_classes:) ⇒ `Object`

#full_hash ⇒ `Object`

#id ⇒ `Object`

#index? ⇒ `Boolean`

#is_intro? ⇒ `Boolean`

#los ⇒ `Object`

#parsed_title ⇒ `Object`

#preface? ⇒ `Boolean`

#remove_elements(xpath:) ⇒ `Object`

#root ⇒ `Object`

#short_id ⇒ `Object`

#tags ⇒ `Object`

#title ⇒ `Object`

#url ⇒ `Object`

#uuid ⇒ `Object`

#version ⇒ `Object`