Module: Document::Corpus

Defined in:
lib/rbbt/document/corpus.rb,
lib/rbbt/document/corpus/pubmed.rb

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.claimsObject

Returns the value of attribute claims.



54
55
56
# File 'lib/rbbt/document/corpus.rb', line 54

def claims
  @claims
end

Class Method Details

.claim(namespace, &block) ⇒ Object



55
56
57
58
# File 'lib/rbbt/document/corpus.rb', line 55

def claim(namespace, &block)
  @claims = {}
  @claims[namespace.to_s] = block
end

.setup(corpus) ⇒ Object



5
6
7
8
9
# File 'lib/rbbt/document/corpus.rb', line 5

def self.setup(corpus)
  corpus.extend Document::Corpus unless Document::Corpus === corpus
  corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
  corpus
end

Instance Method Details

#[](*args) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/rbbt/document/corpus.rb', line 31

def [](*args)
  docid, *rest = args

  res = self.read_and_close do
    super(*args)
  end
  
  return res if args.length > 1

  namespace, id, type  = docid.split(":")

  if res.nil?
    if Document::Corpus.claims.include?(namespace.to_s)
      res = self.instance_exec(id, type, &Document::Corpus.claims[namespace.to_s])
    end
  end

  Document.setup(res, namespace, id, type, self) unless res.nil?
  
  res
end

#add_document(document) ⇒ Object



11
12
13
14
15
16
17
# File 'lib/rbbt/document/corpus.rb', line 11

def add_document(document)
  docid = document.docid
  return document if self.include?(docid)
  self.write_and_close do
    self[docid] = document
  end
end

#add_pmid(pmid, type = nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/document/corpus/pubmed.rb', line 4

def add_pmid(pmid, type = nil)
  pmids = Array === pmid ? pmid : [pmid]
  type = nil if String === type and type.empty?

  res = PubMed.get_article(pmids).collect do |pmid, article|
    document = if type.nil? || type.to_sym == :abstract
                 Document.setup(article.abstract || "", "PMID", pmid, :abstract, self, :corpus => self)
               elsif type.to_sym == :title
                 Document.setup(article.title, :PMID, pmid, :title, self)
               else
                 raise "No FullText available for #{ pmid }" if article.full_text.nil?
                 Document.setup(article.full_text, :PMID, pmid, :fulltext, self, :corpus => self)
               end
    Log.debug "Loading pmid #{pmid}"
    add_document(document)
  end

  Document.setup(res)
end

#add_pubmed_query(query, max = 3000, type = nil) ⇒ Object



24
25
26
27
# File 'lib/rbbt/document/corpus/pubmed.rb', line 24

def add_pubmed_query(query, max = 3000, type = nil)
  pmids = PubMed.query(query, max)
  add_pmid(pmids, type)
end

#docids(prefix) ⇒ Object



19
20
21
22
23
24
25
# File 'lib/rbbt/document/corpus.rb', line 19

def docids(prefix)
  prefix += ":" unless prefix[-1] == ":"
  docids = self.read_and_close do
    self.prefix(prefix)
  end
  DocID.setup(docids, :corpus => self)
end

#documents(prefix) ⇒ Object



27
28
29
# File 'lib/rbbt/document/corpus.rb', line 27

def documents(prefix)
  self.docids(prefix).document
end