Class: Nsf::Document
- Defined in:
- lib/nsf.rb,
lib/nsf/formats/nsf.rb,
lib/nsf/formats/pdf.rb,
lib/nsf/formats/rtf.rb,
lib/nsf/formats/html.rb,
lib/nsf/formats/text.rb
Constant Summary collapse
- PDF_DEFAULT_FONT_SIZE =
10.5
- PDF_LEADING =
0.4
- CONFORMING_TEXT_TAGS =
These tags should be recursively replaced by their contents and the resulting content appended to the current paragraph
%w(a abbr b bdi bdo cite code command datalist del dfn em i img ins kbd label mark math meter noscript output q ruby s samp small span strong sub sup textarea time var wbr)
- NONCONFORMING_TEXT_TAGS =
%w(acronym big center dir font listing plaintext spacer strike tt u xmp)
- TEXT_TAGS =
CONFORMING_TEXT_TAGS + NONCONFORMING_TEXT_TAGS
- HEADING_TAGS =
%w(h1 h2 h3 h4 h5 h6)
- BLOCK_PASSTHROUGH_TAGS =
%w(div dl form ol table tbody thead tfoot tr ul)
- BLOCK_INITIATING_TAGS =
%w(article aside body blockquote dd dt header li nav p pre section td th ul)
- BLOCK_PLAIN_TEXT_TAGS =
%w(pre plaintext listing xmp)
- ENHANCERS =
{ %w(b strong) => "*", %(i em) => "_" }
Instance Attribute Summary collapse
-
#nodes ⇒ Object
Returns the value of attribute nodes.
Class Method Summary collapse
- .from(text, format) ⇒ Object
- .from_blocks(blocks) ⇒ Object
- .from_html(text) ⇒ Object
- .from_nsf(text) ⇒ Object
- .from_rtf(text) ⇒ Object
- .from_text(text) ⇒ Object
-
.lsp(str) ⇒ Object
LSP == Leading SPaces.
Instance Method Summary collapse
-
#initialize(nodes) ⇒ Document
constructor
A new instance of Document.
- #title ⇒ Object
- #to_html ⇒ Object
- #to_nsf ⇒ Object
- #to_pdf(base_font_size = PDF_DEFAULT_FONT_SIZE) ⇒ Object
- #to_rtf ⇒ Object
- #toc ⇒ Object
Constructor Details
#initialize(nodes) ⇒ Document
Returns a new instance of Document.
7 8 9 |
# File 'lib/nsf.rb', line 7 def initialize(nodes) @nodes = nodes end |
Instance Attribute Details
#nodes ⇒ Object
Returns the value of attribute nodes.
5 6 7 |
# File 'lib/nsf.rb', line 5 def nodes @nodes end |
Class Method Details
.from(text, format) ⇒ Object
26 27 28 |
# File 'lib/nsf.rb', line 26 def self.from(text, format) self.send("from_#{format}", text) end |
.from_blocks(blocks) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/nsf.rb', line 30 def self.from_blocks(blocks) self.new(blocks.map do |block| if block =~ /^#+ / Heading.from_nsf(block) elsif block =~ /^ / Fixedblock.from_nsf(block) else Paragraph.from_nsf(block) end end) end |
.from_html(text) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/nsf/formats/html.rb', line 23 def self.from_html(text) iterate = lambda do |node, blocks, current_text| just_appended_br = false node_name = node.node_name.downcase #puts "node_name: #{node_name}, current_text: #{current_text}" return if node.attributes.key?("data-nsf-ignore") && node.attributes["data-nsf-ignore"].value == "true" return if node_name == 'head' if node.text? text = node.inner_text current_text << text return end #Handle repeated brs by making a paragraph break if node_name == 'br' if just_appended_br paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip blocks << Paragraph.new(paragraph_text) if paragraph_text.present? current_text.replace("") else just_appended_br = true end return end #These tags terminate the current paragraph, if present, and start a new paragraph if BLOCK_INITIATING_TAGS.include?(node_name) #puts "initiated" node.children.each { |n| iterate.call(n, blocks, current_text) } paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip blocks << Paragraph.new(paragraph_text) if paragraph_text.present? current_text.replace("") # if BLOCK_PLAIN_TEXT_TAGS.include?(node_name) # blocks.concat(Nsf::Document.from_text(current_text).nodes) # current_text.replace("") # end return end if ENHANCERS.keys.flatten.include?(node_name) ENHANCERS.each_pair do |, nsf_rep| if .include?(node_name) new_text = "" node.children.each { |n| iterate.call(n, blocks, new_text) } current_text << nsf_rep << new_text << nsf_rep end end return end #Pretend that the children of this node were siblings of this node (move them one level up the tree) if (TEXT_TAGS + BLOCK_PASSTHROUGH_TAGS).include?(node_name) node.children.each { |n| iterate.call(n, blocks, current_text) } return end if HEADING_TAGS.include?(node_name) node.children.each { |n| iterate.call(n, blocks, current_text) } heading_text = current_text.gsub(/[[:space:]]+/, ' ').strip blocks << Heading.new(heading_text, node_name[1..-1].to_i) if heading_text.present? current_text.replace("") return end node.children.each { |n| iterate.call(n, blocks, current_text) } end blocks = [] doc = Nokogiri::HTML(text) iterate.call(doc.root, blocks, "") title_tag = doc.css("title").first if title_tag && !blocks.detect { |b| b.is_a?(Heading) && b.level == 1 } blocks.unshift(Heading.new(title_tag.inner_text, 1)) end Document.new(blocks) end |
.from_nsf(text) ⇒ Object
3 4 5 |
# File 'lib/nsf/formats/nsf.rb', line 3 def self.from_nsf(text) self.from_blocks(text.split("\n\n")) end |
.from_rtf(text) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/nsf/formats/rtf.rb', line 6 def self.from_rtf(text) nodes = [] current_text = "" (RubyRTF::Parser.new.parse(text).sections + [{ :text => '', :paragraph => true, :modifiers => {} }]).each do |sec| puts sec.inspect new_text = sec[:text] unless new_text.gsub(/[[:space:]]+/, ' ').blank? new_text = "*#{new_text}*" if sec[:modifiers][:bold] new_text = "_#{new_text}_" if sec[:modifiers][:italic] end current_text << new_text if sec[:modifiers][:paragraph] paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip nodes << Paragraph.new(paragraph_text) if paragraph_text.present? current_text = "" end end Document.new(nodes) end |
.from_text(text) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/nsf/formats/text.rb', line 3 def self.from_text(text) blocks = [] in_paragraph = false first_line = true current_text = "" prev_line = "" lines = text.split("\n") lines.each do |line| if line.blank? || line == lines.last || (current_text.present? && !first_line && (lsp(line) < lsp(prev_line))) if in_paragraph || line == lines.last in_paragraph = false current_text << " " << line unless line.blank? if current_text != "" paragraph_text = current_text.gsub(/[[:space:]]+/, ' ').strip blocks << paragraph_text if paragraph_text.present? current_text = "" end end elsif line =~ /^#+ / blocks << line else first_line = !in_paragraph in_paragraph = true current_text << " " << line prev_line = line end end self.from_blocks(blocks) end |
.lsp(str) ⇒ Object
LSP == Leading SPaces
38 39 40 41 |
# File 'lib/nsf/formats/text.rb', line 38 def self.lsp(str) str =~ /^([[:space:]]+)/ $1 ? $1.length : 0 end |
Instance Method Details
#title ⇒ Object
11 12 13 14 15 16 17 18 19 20 |
# File 'lib/nsf.rb', line 11 def title title_node = nodes.detect { |n| n.is_a?(Heading) && n.level == 1 } if title_node && title_node.text.present? title_node.text elsif nodes.first && (nodes.first.text.length < 100) nodes.first.text else nil end end |
#to_html ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/nsf/formats/html.rb', line 112 def to_html <<-EOF <!doctype html> <html> <head> <meta charset="utf-8"> <title>#{title}</title> </head> <body> #{nodes.map(&:to_html).join} </body> </html> EOF end |
#to_nsf ⇒ Object
7 8 9 |
# File 'lib/nsf/formats/nsf.rb', line 7 def to_nsf nodes.map(&:to_nsf).join("\n\n") end |
#to_pdf(base_font_size = PDF_DEFAULT_FONT_SIZE) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/nsf/formats/pdf.rb', line 9 def to_pdf(base_font_size = PDF_DEFAULT_FONT_SIZE) pdf = Prawn::Document.new(:page_size => "A4", :margin => (base_font_size * 2.22222).round) fd = "#{File.dirname(__FILE__)}/fonts" pdf.font_families.update("Open Sans" => { :normal => "#{fd}/OpenSans-Regular.ttf", :bold => "#{fd}/OpenSans-Bold.ttf", :italic => "#{fd}/OpenSans-Italic.ttf", :bold_italic => "#{fd}/OpenSans-BoldItalic.ttf" }) pdf.font "Open Sans" pdf.font_size = base_font_size pdf.default_leading = (PDF_LEADING * base_font_size).round nodes.each { |n| n.to_pdf(pdf) } pdf.render end |
#to_rtf ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/nsf/formats/rtf.rb', line 31 def to_rtf doc = RTF::Document.new(RTF::Font.new(RTF::Font::ROMAN, 'Times New Roman')) nodes.each do |node| doc.paragraph << node.to_rtf end doc.to_rtf end |