Class: WordToMarkdown::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/word-to-markdown/document.rb

Defined Under Namespace

Classes: ConverstionError, NotFoundError

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, tmpdir = nil) ⇒ Document

Returns a new instance of Document.

Raises:



9
10
11
12
13
# File 'lib/word-to-markdown/document.rb', line 9

def initialize(path, tmpdir = nil)
  @path = File.expand_path path, Dir.pwd
  @tmpdir = tmpdir || Dir.mktmpdir
  raise NotFoundError, "File #{@path} does not exist" unless File.exist?(@path)
end

Instance Attribute Details

#pathObject (readonly)

Returns the value of attribute path.



7
8
9
# File 'lib/word-to-markdown/document.rb', line 7

def path
  @path
end

#tmpdirObject (readonly)

Returns the value of attribute tmpdir.



7
8
9
# File 'lib/word-to-markdown/document.rb', line 7

def tmpdir
  @tmpdir
end

Instance Method Details

#encodingObject

Determine the document encoding

html - the raw html export

Returns the encoding, defaulting to “UTF-8”



42
43
44
45
46
47
48
49
# File 'lib/word-to-markdown/document.rb', line 42

def encoding
  match = raw_html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
  if match
    match[1].sub("macintosh", "MacRoman")
  else
    "UTF-8"
  end
end

#extensionObject



15
16
17
# File 'lib/word-to-markdown/document.rb', line 15

def extension
  File.extname path
end

#htmlObject

Returns the html representation of the document



28
29
30
# File 'lib/word-to-markdown/document.rb', line 28

def html
  tree.to_html.gsub("</li>\n", "</li>")
end

#to_sObject

Returns the markdown representation of the document



33
34
35
# File 'lib/word-to-markdown/document.rb', line 33

def to_s
  @markdown ||= scrub_whitespace(ReverseMarkdown.convert(html, WordToMarkdown::REVERSE_MARKDOWN_OPTIONS))
end

#treeObject



19
20
21
22
23
24
25
# File 'lib/word-to-markdown/document.rb', line 19

def tree
  @tree ||= begin
    tree = Nokogiri::HTML(normalized_html)
    tree.css("title").remove
    tree
  end
end