Class: Nsf::Document

Inherits:
Object show all
Defined in:
lib/nsf.rb,
lib/nsf/formats/nsf.rb,
lib/nsf/formats/pdf.rb,
lib/nsf/formats/rtf.rb,
lib/nsf/formats/html.rb,
lib/nsf/formats/text.rb

Constant Summary collapse

PDF_DEFAULT_FONT_SIZE =
10.5
PDF_LEADING =
0.4
CONFORMING_TEXT_TAGS =

These tags should be recursively replaced by their contents and the resulting content appended to the current paragraph

%w(a abbr b bdi bdo cite code command datalist del dfn em i img ins kbd label mark math meter noscript output q ruby s samp small span strong sub sup textarea time var wbr)
NONCONFORMING_TEXT_TAGS =
%w(acronym big center dir font listing plaintext spacer strike tt u xmp)
TEXT_TAGS =
CONFORMING_TEXT_TAGS + NONCONFORMING_TEXT_TAGS
HEADING_TAGS =
%w(h1 h2 h3 h4 h5 h6)
BLOCK_PASSTHROUGH_TAGS =
%w(div dl form ol table tbody thead tfoot tr ul)
BLOCK_INITIATING_TAGS =
%w(article aside body blockquote dd dt header li nav p pre section td th ul)
BLOCK_PLAIN_TEXT_TAGS =
%w(pre plaintext listing xmp)
ENHANCERS =
{ %w(b strong) => "*", %(i em) => "_" }

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(nodes) ⇒ Document

Returns a new instance of Document.



7
8
9
# File 'lib/nsf.rb', line 7

def initialize(nodes)
  @nodes = nodes
end

Instance Attribute Details

#nodesObject

Returns the value of attribute nodes.



5
6
7
# File 'lib/nsf.rb', line 5

def nodes
  @nodes
end

Class Method Details

.from(text, format) ⇒ Object



26
27
28
# File 'lib/nsf.rb', line 26

def self.from(text, format)
  self.send("from_#{format}", text)
end

.from_blocks(blocks) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
# File 'lib/nsf.rb', line 30

def self.from_blocks(blocks)
  self.new(blocks.map do |block|
    if block =~ /^#+ /
      Heading.from_nsf(block)
    elsif block =~ /^    /
      Fixedblock.from_nsf(block)
    else
      Paragraph.from_nsf(block)
    end
  end)
end

.from_html(text) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/nsf/formats/html.rb', line 23

def self.from_html(text)
  iterate = lambda do |node, blocks, current_text|
    just_appended_br = false
    node_name = node.node_name.downcase
    #puts "node_name: #{node_name}, current_text: #{current_text}"

    return if node.attributes.key?("data-nsf-ignore") && node.attributes["data-nsf-ignore"].value == "true"

    return if node_name == 'head'

    if node.text?
      text = node.inner_text
      current_text << text
      return
    end

    #Handle repeated brs by making a paragraph break
    if node_name == 'br'
      if just_appended_br
        paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip
        blocks << Paragraph.new(paragraph_text) if paragraph_text.present?
        current_text.replace("")
      else
        just_appended_br = true
      end
      return
    end

    #These tags terminate the current paragraph, if present, and start a new paragraph
    if BLOCK_INITIATING_TAGS.include?(node_name)
      #puts "initiated"
      node.children.each { |n| iterate.call(n, blocks, current_text) }

      paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip
      blocks << Paragraph.new(paragraph_text) if paragraph_text.present?
      current_text.replace("")


#          if BLOCK_PLAIN_TEXT_TAGS.include?(node_name)
#            blocks.concat(Nsf::Document.from_text(current_text).nodes)
#            current_text.replace("")
#          end

      return
    end

    if ENHANCERS.keys.flatten.include?(node_name)
      ENHANCERS.each_pair do |tags, nsf_rep|
        if tags.include?(node_name)
          new_text = ""
          node.children.each { |n| iterate.call(n, blocks, new_text) }
          current_text << nsf_rep << new_text << nsf_rep
        end
      end
      return
    end

    #Pretend that the children of this node were siblings of this node (move them one level up the tree)
    if (TEXT_TAGS + BLOCK_PASSTHROUGH_TAGS).include?(node_name)
      node.children.each { |n| iterate.call(n, blocks, current_text) }
      return
    end

    if HEADING_TAGS.include?(node_name)
      node.children.each { |n| iterate.call(n, blocks, current_text) }

      heading_text = current_text.gsub(/[[:space:]]+/, ' ').strip
      blocks << Heading.new(heading_text, node_name[1..-1].to_i) if heading_text.present?
      current_text.replace("")
      return
    end

    node.children.each { |n| iterate.call(n, blocks, current_text) }
  end

  blocks = []

  doc = Nokogiri::HTML(text)

  iterate.call(doc.root, blocks, "")

  title_tag = doc.css("title").first
  if title_tag && !blocks.detect { |b| b.is_a?(Heading) && b.level == 1 }
    blocks.unshift(Heading.new(title_tag.inner_text, 1))
  end

  Document.new(blocks)
end

.from_nsf(text) ⇒ Object



3
4
5
# File 'lib/nsf/formats/nsf.rb', line 3

def self.from_nsf(text)
  self.from_blocks(text.split("\n\n"))
end

.from_rtf(text) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/nsf/formats/rtf.rb', line 6

def self.from_rtf(text)
  nodes = []
  current_text = ""

  (RubyRTF::Parser.new.parse(text).sections + [{ :text => '', :paragraph => true, :modifiers => {} }]).each do |sec|
    puts sec.inspect
    new_text = sec[:text]

    unless new_text.gsub(/[[:space:]]+/, ' ').blank?
      new_text = "*#{new_text}*" if sec[:modifiers][:bold]
      new_text = "_#{new_text}_" if sec[:modifiers][:italic]
    end

    current_text << new_text

    if sec[:modifiers][:paragraph]
      paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip
      nodes << Paragraph.new(paragraph_text) if paragraph_text.present?
      current_text = ""
    end
  end

  Document.new(nodes)
end

.from_text(text) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/nsf/formats/text.rb', line 3

def self.from_text(text)
  blocks = []

  in_paragraph = false
  first_line = true
  current_text = ""
  prev_line = ""
  lines = text.split("\n")
  lines.each do |line|
    if line.blank? || line == lines.last || (current_text.present? && !first_line && (lsp(line) < lsp(prev_line)))
      if in_paragraph || line == lines.last
        in_paragraph = false

        current_text << " " << line unless line.blank?

        if current_text != ""
          paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip
          blocks << paragraph_text if paragraph_text.present?
          current_text = ""
        end
      end
    elsif line =~ /^#+ /
      blocks << line
    else
      first_line = !in_paragraph
      in_paragraph = true
      current_text << " " << line
      prev_line = line
    end
  end

  self.from_blocks(blocks)
end

.lsp(str) ⇒ Object

LSP == Leading SPaces



38
39
40
41
# File 'lib/nsf/formats/text.rb', line 38

def self.lsp(str)
  str =~ /^([[:space:]]+)/
  $1 ? $1.length : 0
end

Instance Method Details

#titleObject



11
12
13
14
15
16
17
18
19
20
# File 'lib/nsf.rb', line 11

def title
  title_node = nodes.detect { |n| n.is_a?(Heading) && n.level == 1 }
  if title_node && title_node.text.present?
    title_node.text
  elsif nodes.first && (nodes.first.text.length < 100)
    nodes.first.text
  else
    nil
  end
end

#to_htmlObject



112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/nsf/formats/html.rb', line 112

def to_html
  <<-EOF
<!doctype html>
<html>
  <head>
<meta charset="utf-8">
<title>#{title}</title>
  </head>
  <body>
#{nodes.map(&:to_html).join}
  </body>
</html>
EOF
end

#to_nsfObject



7
8
9
# File 'lib/nsf/formats/nsf.rb', line 7

def to_nsf
  nodes.map(&:to_nsf).join("\n\n")
end

#to_pdf(base_font_size = PDF_DEFAULT_FONT_SIZE) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/nsf/formats/pdf.rb', line 9

def to_pdf(base_font_size = PDF_DEFAULT_FONT_SIZE)
  pdf = Prawn::Document.new(:page_size => "A4", :margin => (base_font_size * 2.22222).round)

  fd = "#{File.dirname(__FILE__)}/fonts"
  pdf.font_families.update("Open Sans" => {
    :normal => "#{fd}/OpenSans-Regular.ttf",
    :bold => "#{fd}/OpenSans-Bold.ttf",
    :italic => "#{fd}/OpenSans-Italic.ttf",
    :bold_italic => "#{fd}/OpenSans-BoldItalic.ttf"
  })
  pdf.font "Open Sans"
  pdf.font_size = base_font_size
  pdf.default_leading = (PDF_LEADING * base_font_size).round

  nodes.each { |n| n.to_pdf(pdf) }

  pdf.render
end

#to_rtfObject



31
32
33
34
35
36
37
38
39
# File 'lib/nsf/formats/rtf.rb', line 31

def to_rtf
  doc = RTF::Document.new(RTF::Font.new(RTF::Font::ROMAN, 'Times New Roman'))

  nodes.each do |node|
    doc.paragraph << node.to_rtf
  end
  
  doc.to_rtf
end

#tocObject



22
23
24
# File 'lib/nsf.rb', line 22

def toc
  nodes.select { |n| n.is_a?(Nsf::Heading) }
end