Class: BookmarkMachine::BookmarkDocument

Inherits:
Nokogiri::XML::SAX::Document
  • Object
show all
Defined in:
lib/bookmark_machine/netscape_parser.rb

Overview

:nodoc: BookmarkDocument implements SAX callbacks for parsing messy bookmark files. It turns out that a SAX parser is more resilient to bizarre inputs than the typical Nokogiri parser since it doesn’t bother itself with the document structure.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeBookmarkDocument

Returns a new instance of BookmarkDocument.



37
38
39
40
41
42
43
44
45
# File 'lib/bookmark_machine/netscape_parser.rb', line 37

def initialize
  super
  
  @folders = []
  @bookmarks = []
  @current_bookmark = nil
  
  reset_state
end

Instance Attribute Details

#bookmarksObject (readonly)

Returns the value of attribute bookmarks.



35
36
37
# File 'lib/bookmark_machine/netscape_parser.rb', line 35

def bookmarks
  @bookmarks
end

Instance Method Details

#characters(string) ⇒ Object



71
72
73
# File 'lib/bookmark_machine/netscape_parser.rb', line 71

def characters(string)
  @text << string if @state
end

#doneObject



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/bookmark_machine/netscape_parser.rb', line 96

def done
  case @state
  when :bookmark
    @current_bookmark.name = @text.strip
    @bookmarks << @current_bookmark
    @current_bookmark = nil
    reset_state
    
  when :folder
    @folders << @text.strip
    reset_state
    
  when :description
    description = @text.strip
    @bookmarks.last.description = description unless description == ""
    reset_state
    
  end
end

#end_documentObject



67
68
69
# File 'lib/bookmark_machine/netscape_parser.rb', line 67

def end_document
  done
end

#end_element(name, attrs = []) ⇒ Object

Only one closing element has semantic meaning, a closed DL, which indicates the end of a folder.



60
61
62
63
64
65
# File 'lib/bookmark_machine/netscape_parser.rb', line 60

def end_element(name, attrs = [])
  case name
  when "dl" then pop_folder
  else           done
  end
end

#epoch_time(seconds) ⇒ Object

Converts from epoch seconds to a Time object. Returns nil on a nil input.



128
129
130
# File 'lib/bookmark_machine/netscape_parser.rb', line 128

def epoch_time(seconds)
  Time.at(seconds.to_i) if seconds
end

#pop_folderObject



116
117
118
119
# File 'lib/bookmark_machine/netscape_parser.rb', line 116

def pop_folder
  @folders.pop
  done
end

#reset_stateObject



121
122
123
124
# File 'lib/bookmark_machine/netscape_parser.rb', line 121

def reset_state
  @text = ""
  @state = nil
end

#start_bookmark(attrs) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/bookmark_machine/netscape_parser.rb', line 75

def start_bookmark(attrs)
  attrs = Hash[attrs]
  
  @current_bookmark = Bookmark.new(attrs['href'])
  @current_bookmark.created_at = epoch_time(attrs['add_date'])
  @current_bookmark.updated_at = epoch_time((attrs['last_modified'] || attrs['add_date']))
  @current_bookmark.icon = attrs['icon'] || attrs['icon_uri']
  @current_bookmark.tags = tagged_text(attrs['tags'])
  @current_bookmark.folders = @folders.clone
  
  @state = :bookmark
end

#start_description(attrs) ⇒ Object



92
93
94
# File 'lib/bookmark_machine/netscape_parser.rb', line 92

def start_description(attrs)
  @state = :description
end

#start_element(name, attrs = []) ⇒ Object

Only three elements have semantic meaning, A, H3, and DD, representing Folder names, Bookmarks, and Descriptions.



49
50
51
52
53
54
55
56
# File 'lib/bookmark_machine/netscape_parser.rb', line 49

def start_element(name, attrs = [])
  case name
  when "a"  then start_bookmark(attrs)
  when "h3" then start_folder(attrs)
  when "dd" then start_description(attrs)
  else           done
  end
end

#start_folder(attrs) ⇒ Object



88
89
90
# File 'lib/bookmark_machine/netscape_parser.rb', line 88

def start_folder(attrs)
  @state = :folder
end

#tagged_text(str) ⇒ Object



132
133
134
# File 'lib/bookmark_machine/netscape_parser.rb', line 132

def tagged_text(str)
  str.split(",").map{|t| t.strip} if str
end