Class: Wgit::Database
- Inherits:
-
Object
- Object
- Wgit::Database
- Includes:
- Assertable
- Defined in:
- lib/wgit/database/database.rb
Overview
Class modeling a DB connection and CRUD operations for the Url and Document collections. The most common methods are: insert, update, urls, search, stats, size.
Constant Summary collapse
- LOG_FILE_PATH =
Is relative to the root project folder, not this file.
"misc/mongo_log.txt"
Constants included from Assertable
Assertable::DEFAULT_DUCK_FAIL_MSG, Assertable::DEFAULT_TYPE_FAIL_MSG, Assertable::WRONG_METHOD_MSG
Instance Method Summary collapse
- #crawled_urls(limit = 0, skip = 0, &block) ⇒ Object
-
#initialize ⇒ Database
constructor
A new instance of Database.
-
#insert(data) ⇒ Object
Create Data ###.
- #insert_docs(doc_or_docs) ⇒ Object (also: #insert_doc)
- #insert_urls(url_or_urls) ⇒ Object (also: #insert_url)
-
#search(text, whole_sentence = false, limit = 10, skip = 0, &block) ⇒ Array
Currently all searches are case insensitive.
-
#search_p(text, whole_sentence = false, limit = 10, skip = 0, sentence_length = 80, &block) ⇒ Object
(also: #search_and_format)
Performs a search and pretty prints the results.
- #size ⇒ Object (also: #count, #length)
-
#stats ⇒ Object
Returns a Mongo object which can be used like a Hash to retrieve values.
- #uncrawled_urls(limit = 0, skip = 0, &block) ⇒ Object
-
#update(data) ⇒ Object
Update Data ###.
- #update_doc(doc) ⇒ Object
- #update_url(url) ⇒ Object
-
#urls(crawled = nil, limit = 0, skip = 0, &block) ⇒ Object
A crawled parameter value of nil (the default) returns all urls.
Methods included from Assertable
#assert_arr_types, #assert_respond_to, #assert_types
Constructor Details
#initialize ⇒ Database
Returns a new instance of Database.
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/wgit/database/database.rb', line 21 def initialize conn_details = Wgit::CONNECTION_DETAILS if conn_details.empty? raise "Wgit::CONNECTION_DETAILS must be defined and include :host, :port, :db, :uname, :pword for a database connection to be established." end logger = Logger.new(LOG_FILE_PATH) address = "#{conn_details[:host]}:#{conn_details[:port]}" @@client = Mongo::Client.new([address], :database => conn_details[:db], :user => conn_details[:uname], :password => conn_details[:pword], :logger => logger, :truncate_logs => false) end |
Instance Method Details
#crawled_urls(limit = 0, skip = 0, &block) ⇒ Object
103 104 105 |
# File 'lib/wgit/database/database.rb', line 103 def crawled_urls(limit = 0, skip = 0, &block) urls(true, limit, skip, &block) end |
#insert(data) ⇒ Object
Create Data ###
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/wgit/database/database.rb', line 40 def insert(data) if data.is_a?(Url) insert_urls(data) elsif data.is_a?(Document) insert_docs(data) elsif data.respond_to?(:first) if data.first.is_a?(Url) insert_urls(data) else insert_docs(data) end else raise "data is not in the correct format (all Url's or Document's)" end end |
#insert_docs(doc_or_docs) ⇒ Object Also known as: insert_doc
69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/wgit/database/database.rb', line 69 def insert_docs(doc_or_docs) unless doc_or_docs.respond_to?(:map) assert_type(doc_or_docs, [Document, Hash]) unless doc_or_docs.is_a?(Hash) doc_or_docs = Wgit::Model.document(doc_or_docs) end else assert_arr_types(doc_or_docs, [Document, Hash]) doc_or_docs = doc_or_docs.map do |doc| Wgit::Model.document(doc) unless doc.is_a?(Hash) end end create(:documents, doc_or_docs) end |
#insert_urls(url_or_urls) ⇒ Object Also known as: insert_url
56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/wgit/database/database.rb', line 56 def insert_urls(url_or_urls) unless url_or_urls.respond_to?(:map) assert_type(url_or_urls, Url) url_or_urls = Wgit::Model.url(url_or_urls) else assert_arr_types(url_or_urls, Url) url_or_urls = url_or_urls.map do |url| Wgit::Model.url(url) end end create(:urls, url_or_urls) end |
#search(text, whole_sentence = false, limit = 10, skip = 0, &block) ⇒ Array
Currently all searches are case insensitive.
Searches against the indexed docs in the DB for the given text. The searched fields are decided by the text index setup against the documents collection. Currently we search against the following fields: “author”, “keywords”, “title” and “text”.
The MongoDB search ranks/sorts the results in order (highest first) based upon each documents textScore which records the number of text hits. We then store this textScore in each Document object for use elsewhere if needed.
searched for separately. most relevant based upon the textScore of the search.
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/wgit/database/database.rb', line 132 def search(text, whole_sentence = false, limit = 10, skip = 0, &block) text.strip! text.replace("\"" + text + "\"") if whole_sentence # The textScore sorts based on the most search hits. # We use the textScore hash as a sort and a projection below. # :$caseSensitive => case_sensitive, # 3.2+ only. sort_proj = { :score => { :$meta => "textScore" } } query = { :$text => { :$search => text } } results = retrieve(:documents, query, sort_proj, sort_proj, limit, skip) return [] if results.count < 1 # results.respond_to? :map! is false so we use map and overwrite the var. results = results.map { |mongo_doc| Wgit::Document.new(mongo_doc) } return results unless block_given? results.each { |doc| block.call(doc) } end |
#search_p(text, whole_sentence = false, limit = 10, skip = 0, sentence_length = 80, &block) ⇒ Object Also known as: search_and_format
Performs a search and pretty prints the results.
151 152 153 154 155 |
# File 'lib/wgit/database/database.rb', line 151 def search_p(text, whole_sentence = false, limit = 10, skip = 0, sentence_length = 80, &block) results = search(text, whole_sentence, limit, skip, &block) Wgit::Utils.printf_search_results(results, text, false, sentence_length) end |
#size ⇒ Object Also known as: count, length
162 163 164 |
# File 'lib/wgit/database/database.rb', line 162 def size stats[:dataSize] end |
#stats ⇒ Object
Returns a Mongo object which can be used like a Hash to retrieve values.
158 159 160 |
# File 'lib/wgit/database/database.rb', line 158 def stats @@client.command(:dbStats => 0).documents[0] end |
#uncrawled_urls(limit = 0, skip = 0, &block) ⇒ Object
107 108 109 |
# File 'lib/wgit/database/database.rb', line 107 def uncrawled_urls(limit = 0, skip = 0, &block) urls(false, limit, skip, &block) end |
#update(data) ⇒ Object
Update Data ###
168 169 170 171 172 173 174 175 176 |
# File 'lib/wgit/database/database.rb', line 168 def update(data) if data.is_a?(Url) update_url(data) elsif data.is_a?(Document) update_doc(data) else raise "data is not in the correct format (all Url's or Document's)" end end |
#update_doc(doc) ⇒ Object
186 187 188 189 190 191 192 |
# File 'lib/wgit/database/database.rb', line 186 def update_doc(doc) assert_type(doc, Document) selection = { :url => doc.url } doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data) update = { "$set" => doc_hash } _update(true, :documents, selection, update) end |
#update_url(url) ⇒ Object
178 179 180 181 182 183 184 |
# File 'lib/wgit/database/database.rb', line 178 def update_url(url) assert_type(url, Url) selection = { :url => url } url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data) update = { "$set" => url_hash } _update(true, :urls, selection, update) end |
#urls(crawled = nil, limit = 0, skip = 0, &block) ⇒ Object
A crawled parameter value of nil (the default) returns all urls. A limit of 0 means all urls are returned. All urls are sorted by date_added ascending, in other words the first url in the results is the first added.
90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/wgit/database/database.rb', line 90 def urls(crawled = nil, limit = 0, skip = 0, &block) crawled.nil? ? query = {} : query = { :crawled => crawled } sort = { :date_added => 1 } results = retrieve(:urls, query, sort, {}, limit, skip) return [] if results.count < 1 # results.respond_to? :map! is false so we use map and overwrite the var. results = results.map { |url_doc| Wgit::Url.new(url_doc) } return results unless block_given? results.each { |url| block.call(url) } end |