GrnMini
Groonga(Rroonga) wrapper for using easily. It is the KVS so easy to use.
Installation
$ gem install grn_mini
Basic Usage
Create a database with the name "test.db".
require 'grn_mini'
array = GrnMini::Array.new("test.db")
Add the record with the number column and text column.
Determine the column type when you first call "GrnMini::Array#add". (Add inverted index if data type is "string".)
array.add(text: "aaa", number: 1)
It is also possible to use the '<<'
array << {text: "bbb", number: 2}
array << {text: "ccc", number: 3}
array.size #=> 3
Create a temporary database. (Useful for testing)
GrnMini::Array.tmpdb do |array|
array << {text: "aaa", number: 1}
array << {text: "bbb", number: 2}
array << {text: "ccc", number: 3}
end
# Delete temporary database
Data Type
GrnMini::Array.tmpdb do |array|
array << {filename: "a.txt", int: 1, float: 1.5, time: Time.at(1999)}
array << {filename: "b.doc", int: 2, float: 2.5, time: Time.at(2000)}
# ShortText
array[1].filename #=> "a.txt"
array[2].filename #=> "b.doc"
# Int32
array[1].int #=> 1
array[2].int #=> 2
# Float
array[1].float #=> 1.5
array[2].float #=> 2.5
# Time
array[1].time #=> 1999-01-01
array[2].time #=> 2000-01-01
end
See also 8.4. Data type — Groonga documentation.
Access Record
Add
require 'grn_mini'
array = GrnMini::Array.new("test2.db")
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
Read
record = array[1] # Read from id (> 0)
record.id #=> 1
Access function with the same name as the column name
record.name #=> "Tanaka
record.age #=> 11
record.height #=> 162.5
Groonga::Record#attributes is useful for debug
record.attributes #=> {"_id"=>1, "age"=>11, "height"=>162.5, "name"=>"Tanaka"}
Update
array[2].name = "Hayashi"
array[2].attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Hayashi"}
Delete
Delete by passing id.
array.delete(1)
# It returns 'nil' value when you access a deleted record
array[1].attributes #=> {"_id"=>1, "age"=>0, "height"=>0.0, "name"=>nil}
# Can't see deleted records if access from Enumerable
array.first.id #=> 2
array.first.attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Hayashi"}
It is also possible to pass the block.
GrnMini::Array.tmpdb do |array|
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
array << {name:"Hayashi", age: 20, height: 165.0}
array.delete do |record|
record.age <= 20
end
array.size #=> 1
array.first.attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Suzuki"}
end
Search
Use GrnMini::Array#select method.
:text column is set to the :default_column implicitly.
GrnMini::Array.tmpdb do |array|
array << {text:"aaa", number:1}
array << {text:"bbb", number:20}
array << {text:"bbb ccc", number:2}
array << {text:"bbb", number:15}
array << {text:"ccc", number:3}
results = array.select("aaa")
results.map {|record| record.attributes} #=> [{"_id"=>1, "_key"=>{"_id"=>1, "number"=>1, "text"=>"aaa"}, "_score"=>1}]
# AND
results = array.select("bbb ccc")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}]
# Specify column
results = array.select("bbb number:<10")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}]
# AND, OR, Grouping
results = array.select("bbb (number:<= 10 OR number:>=20)")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}, {"_id"=>4, "_key"=>{"_id"=>2, "number"=>20, "text"=>"bbb"}, "_score"=>2}]
# NOT
results = array.select("bbb - ccc")
results.map {|record| record.attributes} #=> [{"_id"=>1, "_key"=>{"_id"=>2, "number"=>20, "text"=>"bbb"}, "_score"=>1}, {"_id"=>3, "_key"=>{"_id"=>4, "number"=>15, "text"=>"bbb"}, "_score"=>1}]
end
Change :default_column to :filename column.
GrnMini::Array.tmpdb do |array|
array << {text: "txt", filename:"a.txt"}
array << {text: "txt", filename:"a.doc"}
array << {text: "txt", filename:"a.rb"}
# Specify column
results = array.select("filename:@txt")
results.first.attributes #=> {"_id"=>1, "_key"=>{"_id"=>1, "filename"=>"a.txt", "text"=>"txt"}, "_score"=>1}
# Change default_column
results = array.select("txt", default_column: "filename")
results.first.attributes #=> {"_id"=>1, "_key"=>{"_id"=>1, "filename"=>"a.txt", "text"=>"txt"}, "_score"=>1}
end
See also 8.10.1. Query syntax, Groonga::Table#select
Sort
Specify column name to sort.
GrnMini::Array.tmpdb do |array|
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
array << {name:"Hayashi", age: 21, height: 175.4}
array << {name:"Suzuki", age: 5, height: 110.0}
sorted = array.sort(["age"])
sorted.map {|r| {name: r.name, age: r.age}}
#=> [{:name=>"Suzuki", :age=> 5},
# {:name=>"Tanaka", :age=>11},
# {:name=>"Hayashi", :age=>21},
# {:name=>"Suzuki", :age=>31}]
end
Combination sort.
sorted = array.sort([{key: "name", order: :ascending},
{key: "age" , order: :descending}])
sorted.map {|r| {name: r.name, age: r.age}}
#=> [{:name=>"Hayashi", :age=>21},
# {:name=>"Suzuki", :age=>31},
# {:name=>"Suzuki", :age=> 5},
# {:name=>"Tanaka", :age=>11}]
Grouping
Drill down aka.
GrnMini::Array.tmpdb do |array|
array << {text:"aaaa.txt", suffix:"txt", type:1}
array << {text:"aaaa.doc", suffix:"doc", type:2}
array << {text:"aabb.txt", suffix:"txt", type:2}
groups = GrnMini::Util::group_with_sort(array, "suffix")
groups.size #=> 2
[groups[0].key, groups[0].n_sub_records] #=> ["txt", 2]
[groups[1].key, groups[1].n_sub_records] #=> ["doc", 1]
end
Grouping from selection results.
GrnMini::Array.tmpdb do |array|
array << {text:"aaaa", suffix:"txt"}
array << {text:"aaaa", suffix:"doc"}
array << {text:"aaaa", suffix:"txt"}
array << {text:"cccc", suffix:"txt"}
results = array.select("aa")
groups = GrnMini::Util::group_with_sort(results, "suffix")
groups.size #=> 2
[groups[0].key, groups[0].n_sub_records] #=> ["txt", 2]
[groups[1].key, groups[1].n_sub_records] #=> ["doc", 1]
end
Snippet
Display of keyword surrounding text. It is often used in search engine.
Use GrnMini::Util::text_snippet_from_selection_results.
GrnMini::Array.tmpdb do |array|
array << {text: "[1] This is a pen pep pea pek pet.\n------------------------------\n------------------------------\n------------------------------\n------------------------------\n[2] This is a pen pep pea pek pet.\n------------------------------\n------------------------------\n------------------------------\n------------------------------\n", filename: "aaa.txt"}
results = array.select("This pen")
snippet = GrnMini::Util::text_snippet_from_selection_results(results)
record = results.first
segments = snippet.execute(record.text)
segments.size #=> 2
segments[0] #=> "[1] <<This>> is a <<pen>> pep pea pek pet.\n------------------------------\n------------------------------\n---"
segments[1] #=> "--------\n------------------------------\n[2] <<This>> is a <<pen>> pep pea pek pet.\n-------------------------"
end
GrnMini::Util::html_snippet_from_selection_results is HTML escaped.
GrnMini::Array.tmpdb do |array|
array << {text: "<html>\n <div>This is a pen pep pea pek pet.</div>\n</html>\n", filename: "aaa.txt"}
results = array.select("This pen")
snippet = GrnMini::Util::html_snippet_from_selection_results(results, '<span class="strong">', '</span>') # Default value is '<strong>', '</strong>'
record = results.first
segments = snippet.execute(record.text)
segments.size #=> 1
segments.first #=> "<html>\n <div><span class=\"strong\">This</span> is a <span class=\"strong\">pen</span> pep pea pek pet.</div>\n</html>\n"
end
See also Groonga::Expression#snippet
Pagination
#paginate is more convenient than #sort if you want a pagination.
GrnMini::Array.tmpdb do |array|
array << {text: "aaaa", filename: "1.txt"}
array << {text: "aaaa aaaa", filename: "2a.txt"}
array << {text: "aaaa aaaa aaaa", filename: "3.txt"}
array << {text: "aaaa aaaa", filename: "2b.txt"}
array << {text: "aaaa aaaa", filename: "2c.txt"}
array << {text: "aaaa aaaa", filename: "2d.txt"}
array << {text: "aaaa aaaa", filename: "2e.txt"}
array << {text: "aaaa aaaa", filename: "2f.txt"}
results = array.select("aaaa")
# -- page1 --
page_entries = results.paginate([["_score", :desc]], :page => 1, :size => 5)
# Total number of record
page_entries.n_records #=> 8
# Page offset
page_entries.start_offset #=> 1
page_entries.end_offset #=> 5
# Page entries
page_entries.size #=> 5
# -- page2 --
page_entries = results.paginate([["_score", :desc]], :page => 2, :size => 5)
# Sample page content display
puts "#{page_entries.n_records} hit. (#{page_entries.start_offset} - #{page_entries.end_offset})"
page_entries.each do |record|
puts "#{record.filename}: #{record.text}"
end
#=> 8 hit. (6 - 8)
# 2b.txt: aaaa aaaa
# 2f.txt: aaaa aaaa
# 1.txt: aaaa
end
See also Groonga::Table#pagenate