Class: ArticleJSON::Import::GoogleDoc::HTML::NodeAnalyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/article_json/import/google_doc/html/node_analyzer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(node) ⇒ NodeAnalyzer

Returns a new instance of NodeAnalyzer.

Parameters:

  • node (Nokogiri::HTML::Node)


9
10
11
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 9

def initialize(node)
  @node = node
end

Instance Attribute Details

#nodeObject (readonly)

Returns the value of attribute node.



6
7
8
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 6

def node
  @node
end

Instance Method Details

#begins_with?(text) ⇒ Boolean

Check if the node text begins with a certain text

Parameters:

  • (String)

Returns:

  • (Boolean)


23
24
25
26
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 23

def begins_with?(text)
  first_word = node.inner_text.strip.downcase.split(' ').first
  first_word == text.strip.downcase
end

#br?Boolean

Check if the node is a linebreak. A span only containing whitespaces and
tags is considered a linebreak.

Returns:

  • (Boolean)


104
105
106
107
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 104

def br?
  return @is_br if defined? @is_br
  @is_br = node.name == 'br' || only_includes_brs?
end

#embed?Boolean

Check if the node contains an embedded element

Returns:

  • (Boolean)


96
97
98
99
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 96

def embed?
  return @is_embed if defined? @is_embed
  @is_embed = EmbeddedParser.supported?(node)
end

#empty?Boolean

Check if the node is empty, i.e. not containing any text Given that images are the only nodes without text, we have to make sure that it’s not an image.

Returns:

  • (Boolean)


32
33
34
35
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 32

def empty?
  return @is_empty if defined? @is_empty
  @is_empty = node.inner_text.strip.empty? && !image? && !hr? && !br?
end

#has_text?(text) ⇒ Boolean

Check if a node equals a certain text

Parameters:

  • text (String)

Returns:

  • (Boolean)


16
17
18
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 16

def has_text?(text)
  node.inner_text.strip.downcase == text.strip.downcase
end

#heading?Boolean

Check if the node is a header tag between <h1> and <h5>

Returns:

  • (Boolean)


39
40
41
42
43
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 39

def heading?
  return @is_heading if defined? @is_heading
  @is_heading =
    !quote? && !text_box? && %w(h1 h2 h3 h4 h5).include?(node.name)
end

#hr?Boolean

Check if the node is a horizontal line (i.e. ‘<hr>`)

Returns:

  • (Boolean)


47
48
49
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 47

def hr?
  node.name == 'hr'
end

#image?Boolean

Check if the node contains an image

Returns:

  • (Boolean)


89
90
91
92
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 89

def image?
  return @is_image if defined? @is_image
  @is_image = node.xpath('.//img').length > 0
end

#list?Boolean

Check if the node contains an ordered or unordered list

Returns:

  • (Boolean)


66
67
68
69
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 66

def list?
  return @is_list if defined? @is_list
  @is_list = %w(ul ol).include?(node.name)
end

#paragraph?Boolean

Check if the node is a normal text paragraph

Returns:

  • (Boolean)


53
54
55
56
57
58
59
60
61
62
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 53

def paragraph?
  return @is_paragraph if defined? @is_paragraph
  @is_paragraph =
    node.name == 'p' &&
      !empty? &&
      !image? &&
      !text_box? &&
      !quote? &&
      !embed?
end

#quote?Boolean

Check if the node starts a quote Quotes start with a single line saying “Quote:”.

Returns:

  • (Boolean)


82
83
84
85
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 82

def quote?
  return @is_quote if defined? @is_quote
  @is_quote = has_text?('quote:')
end

#text_box?Boolean

Check if the node starts a text box Text boxes start with a single line saying “Textbox:” or “Highlight:”.

Returns:

  • (Boolean)


74
75
76
77
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 74

def text_box?
  return @is_text_box if defined? @is_text_box
  @is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
end

#typeSymbol

Determine the type of this node The type is one of the elements supported by article_json.

Returns:

  • (Symbol)


112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/article_json/import/google_doc/html/node_analyzer.rb', line 112

def type
  return :empty if empty?
  return :hr if hr?
  return :heading if heading?
  return :paragraph if paragraph?
  return :list if list?
  return :text_box if text_box?
  return :quote if quote?
  return :image if image?
  return :embed if embed?
  :unknown
end