Top Level Namespace

Defined Under Namespace

Classes: Connection, Juxta

Constant Summary collapse

Log_sem =

coordinate access to stdout…

Mutex.new

Instance Method Summary collapse

Instance Method Details

#error_message(message) ⇒ Object



14
15
16
17
18
# File 'lib/juxta/utilities.rb', line 14

def error_message( message )
   Log_sem.synchronize {
      puts( "#{Time.now.strftime("%T")}-#{Thread.current.object_id % 10000}: ERROR ** #{message} **" )
   }
end

#get_filelist(dir) ⇒ Object



24
25
26
27
28
29
# File 'lib/juxta/utilities.rb', line 24

def get_filelist( dir )
  files = Dir.entries( dir )
  files = files.map { |f| "#{dir}/#{f}"}
  files = files.delete_if { |f| File.directory?( "#{f}") == true }   # remove any directories
  return files
end

#log_message(message) ⇒ Object



8
9
10
11
12
# File 'lib/juxta/utilities.rb', line 8

def log_message( message )
   Log_sem.synchronize {
      puts "#{Time.now.strftime("%T")}-#{Thread.current.object_id % 10000}: #{message}"
   }
end

#make_guidObject



20
21
22
# File 'lib/juxta/utilities.rb', line 20

def make_guid( )
  UUIDTools::UUID.random_create.to_s.gsub(/-/, '')
end

#standard_filesetObject



31
32
33
34
35
36
37
# File 'lib/juxta/utilities.rb', line 31

def standard_fileset( )
  fileset = []
  fileset.push("test/good-testdata/MD_AmerCh1b.xml")
  fileset.push("test/good-testdata/MD_Brit_v1CH1a.xml")
  return fileset

end

#wikipedia(title, limit = 5, lang = "en") ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/juxta/utilities.rb', line 46

def wikipedia( title, limit=5, lang="en" )
  revision_ids = wikipediaQueryRevisions(lang, {
    :titles => title,
    :prop => "revisions",
    :rvlimit => 500,
    :rvprop => "ids|flags"    
  }).select { |rev|  !rev.has_key?("minor")  }.map { |rev| rev["revid"] }.slice(0, limit)

  wikipediaQueryRevisions(lang, {
    :revids => revision_ids.join("|"),
    :prop => "revisions",
    :rvprop => "content"    
  }).map { |rev| rev.fetch("*", "") }.select { |rev| rev != "" }
end

#wikipediaQueryRevisions(lang, params) ⇒ Object



39
40
41
42
43
44
# File 'lib/juxta/utilities.rb', line 39

def wikipediaQueryRevisions( lang, params )
  return JSON.parse(RestClient.get("http://#{lang}.wikipedia.org/w/api.php", :params => {
    :format => "json",
    :action => "query"
  }.merge(params))).fetch("query", {}).fetch("pages", {}).values.map { |page| page.fetch("revisions", []) }.flatten
end