Module: Document::Corpus
- Defined in:
- lib/rbbt/document/corpus.rb,
lib/rbbt/document/corpus/pubmed.rb
Constant Summary collapse
- PUBMED_NAMESPACE =
"PMID"
Class Attribute Summary collapse
-
.claims ⇒ Object
Returns the value of attribute claims.
Class Method Summary collapse
Instance Method Summary collapse
- #[](*args) ⇒ Object
- #add_document(document) ⇒ Object
- #add_pmid(pmid, type = nil, update = false) ⇒ Object
- #add_pubmed_query(query, max = 3000, type = nil) ⇒ Object
- #docids(*prefix) ⇒ Object
- #documents(*prefix) ⇒ Object
Class Attribute Details
.claims ⇒ Object
Returns the value of attribute claims.
59 60 61 |
# File 'lib/rbbt/document/corpus.rb', line 59 def claims @claims end |
Class Method Details
.claim(namespace, &block) ⇒ Object
60 61 62 63 |
# File 'lib/rbbt/document/corpus.rb', line 60 def claim(namespace, &block) @claims = {} @claims[namespace.to_s] = block end |
.setup(corpus) ⇒ Object
5 6 7 8 9 10 11 |
# File 'lib/rbbt/document/corpus.rb', line 5 def self.setup(corpus) corpus = Persist.open_tokyocabinet(corpus, true, :single, "BDB") if String === corpus corpus.extend Document::Corpus unless Document::Corpus === corpus corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus corpus.close corpus end |
Instance Method Details
#[](*args) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/rbbt/document/corpus.rb', line 34 def [](*args) docid, *rest = args res = self.read_and_close do super(*args) end res.force_encoding(Encoding.default_external) if res return res if args.length > 1 namespace, id, type = docid.split(":") if res.nil? if Document::Corpus.claims.include?(namespace.to_s) res = self.instance_exec(id, type, &Document::Corpus.claims[namespace.to_s]) end end res.force_encoding(Encoding.default_external) if res Document.setup(res, namespace, id, type, self) unless res.nil? res end |
#add_document(document) ⇒ Object
13 14 15 16 17 18 19 |
# File 'lib/rbbt/document/corpus.rb', line 13 def add_document(document) docid = document.docid return self[docid] if self.include?(docid) self.write_and_close do self[docid] = document end end |
#add_pmid(pmid, type = nil, update = false) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/rbbt/document/corpus/pubmed.rb', line 5 def add_pmid(pmid, type = nil, update = false) type = :abstract if type.nil? if update == false id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":" documents = self.documents(id) return documents if documents.any? end pmids = Array === pmid ? pmid : [pmid] type = nil if String === type and type.empty? res = PubMed.get_article(pmids).collect do |pmid, article| document = if type.to_sym == :abstract Document.setup(article.abstract || "", PUBMED_NAMESPACE, pmid, :abstract, self, :corpus => self) elsif type.to_sym == :title Document.setup(article.title, PUBMED_NAMESPACE, pmid, :title, self) else raise "No FullText available for #{ pmid }" if article.full_text.nil? Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self) end Log.debug "Loading pmid #{pmid}" add_document(document) document end Document.setup(res) end |
#add_pubmed_query(query, max = 3000, type = nil) ⇒ Object
33 34 35 36 |
# File 'lib/rbbt/document/corpus/pubmed.rb', line 33 def add_pubmed_query(query, max = 3000, type = nil) pmids = PubMed.query(query, max) add_pmid(pmids, type) end |
#docids(*prefix) ⇒ Object
21 22 23 24 25 26 27 28 |
# File 'lib/rbbt/document/corpus.rb', line 21 def docids(*prefix) prefix = prefix * ":" prefix += ":" unless prefix == :all || prefix[-1] == ":" docids = self.read_and_close do prefix == :all ? self.keys : self.prefix(prefix) end DocID.setup(docids, :corpus => self) end |
#documents(*prefix) ⇒ Object
30 31 32 |
# File 'lib/rbbt/document/corpus.rb', line 30 def documents(*prefix) self.docids(*prefix).document end |