Class: EPUB::Search::Database

Inherits:
Object
  • Object
show all
Includes:
MonitorMixin
Defined in:
lib/epub/search/database.rb,
lib/epub/search/database/actor.rb

Defined Under Namespace

Classes: Actor

Constant Summary collapse

DIR_NAME =
'db'
FILE_NAME =
'epub-search.db'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db_dir) ⇒ Database

Returns a new instance of Database.



13
14
15
16
17
# File 'lib/epub/search/database.rb', line 13

def initialize(db_dir)
  super()
  @db_dir = Pathname === db_dir ? db_dir : Pathname.new(db_dir)
  Groonga::Context.default_options = {:encoding => :utf8}
end

Instance Attribute Details

#db_dirObject (readonly)

Returns the value of attribute db_dir.



11
12
13
# File 'lib/epub/search/database.rb', line 11

def db_dir
  @db_dir
end

Instance Method Details

#add(file_path) ⇒ Integer

Returns the number of added recoreds.

Parameters:

  • path (Pathname|String)

    path of book

Returns:

  • (Integer)

    the number of added recoreds



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/epub/search/database.rb', line 53

def add(file_path)
  file_path = Pathname.new(file_path) unless file_path.kind_of? Pathname
  location = file_path.expand_path
  book = EPUB::Parser.parse(location)
  contents = book.each_content.select {|content|
    content.media_type == 'application/xhtml+xml'
  }
  contents.each do |content|
    doc = Nokogiri.XML(content.read)
    title_elem = doc.search('title').first
    page_title = title_elem ? title_elem.text : ''
    body = Nokogiri.XML(doc.search('body').first.to_xml).content
    synchronize do
      open do
        pages.add('location'   => location.to_s,
                  'iri'        => content.href.to_s,
                  'book_title' => book.title,
                  'page_title' => page_title,
                  'content'    => body)
      end
    end
  end
  contents.length
end

#books(path = false) ⇒ Object



113
114
115
116
117
118
119
120
121
# File 'lib/epub/search/database.rb', line 113

def books(path=false)
  open do
    pages.group_by(&:location).collect {|location, records|
      result = records.first.book_title
      result << " - #{location}" if path
      result
    }
  end
end

#db_fileObject



19
20
21
# File 'lib/epub/search/database.rb', line 19

def db_file
  @db_file ||= @db_dir + FILE_NAME
end

#init(force = false) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/epub/search/database.rb', line 27

def init(force=false)
  @db_dir.rmtree if force && @db_dir.exist?
  @db_dir.mkpath
  Groonga::Database.create :path => db_file.to_path
  Groonga::Schema.create_table 'Pages', :type => :array
  Groonga::Schema.change_table 'Pages' do |table|
    table.text 'location' # file path or URI
    table.text 'iri' # inner IRI
    table.text 'book_title'
    table.text 'page_title'
    table.text 'metadata'
    table.text 'content'
  end
  Groonga::Schema.create_table 'Terms',
                               :type => :patricia_trie,
                               :normalizer => :NormalizerAuto,
                               :default_tokenizer => 'TokenBigram'
  Groonga::Schema.change_table 'Terms' do |table|
    table.index 'Pages.book_title'
    table.index 'Pages.metadata'
    table.index 'Pages.content'
  end
end

#locationsObject



123
124
125
126
127
# File 'lib/epub/search/database.rb', line 123

def locations
  open do
    pages.group_by(&:location).collect {|location, records| location}
  end
end

#pagesObject



23
24
25
# File 'lib/epub/search/database.rb', line 23

def pages
  Groonga['Pages']
end

#remove(file_path) ⇒ Integer

Returns the number of removed recoreds.

Parameters:

  • file_path (Pathname|String)

    path of book

Returns:

  • (Integer)

    the number of removed recoreds



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/epub/search/database.rb', line 80

def remove(file_path)
  file_path = Pathname.new(file_path) unless file_path.kind_of? Pathname
  location = file_path.expand_path
  record_count = 0
  synchronize do
    open do
      records = pages.select {|record|
        record.location == location.to_path
      }
      records.each do |record|
        record.key.delete
        record_count += 1
      end
    end
  end
  record_count
end

#search(word, book = nil) {|result| ... } ⇒ Object

Parameters:

  • word (String)

    search word

  • book (String) (defaults to: nil)

    book title

Yield Parameters:

  • result (Hash)

    search result. the key is file path and the value is an array of records



102
103
104
105
106
107
108
109
110
111
# File 'lib/epub/search/database.rb', line 102

def search(word, book=nil)
  open do
    result = pages.select {|record|
      conditions = [record.content =~ word]
      conditions << (record.book_title =~ book) if book
      conditions
    }.group_by(&:location)
    yield result
  end
end