Class: PubliSci::Dataset

Inherits:
Object
  • Object
show all
Extended by:
Interactive, Registry
Defined in:
lib/bio-publisci/dataset/dataset.rb,
lib/bio-publisci/dsl/dataset_dsl.rb,
lib/bio-publisci/dataset/data_cube.rb,
lib/bio-publisci/dataset/dataset_for.rb,
lib/bio-publisci/dataset/configuration.rb

Defined Under Namespace

Modules: DSL, DataCube Classes: Configuration

Class Method Summary collapse

Methods included from Interactive

interact

Methods included from Registry

register, registry, symbol_for

Class Method Details

.configurationObject



6
7
8
# File 'lib/bio-publisci/dataset/dataset.rb', line 6

def self.configuration
  @config ||= Dataset::Configuration.new
end

.download(uri) ⇒ Object



41
42
43
44
45
46
# File 'lib/bio-publisci/dataset/dataset_for.rb', line 41

def self.download(uri)
  out = Tempfile.new(uri.split('/').last)
  out.write open(uri).read
  out.close
  out
end

.for(object, options = {}, ask_on_ambiguous = true) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/bio-publisci/dataset/dataset_for.rb', line 6

def self.for(object, options={}, ask_on_ambiguous=true)

  if options == false || options == true
    ask_on_ambiguous = options
    options = {}
  end

  if object.is_a? String
    if File.exist? object
      if File.extname(object).size > 0
        extension = File.extname(object)
      elsif File.basename(object)[0] == '.' && File.basename(object).count('.') == 1
        extension = File.basename(object)
      else
        raise "Can't load file #{object}; type inference not yet implemented"
      end

      case extension
      when ".RData"
        r_object(object, options, ask_on_ambiguous)
      when /.csv/i
        PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
      end
    elsif object =~ %r{http[s]://.+}
      self.for(download(object).path, options, ask_on_ambiguous)
    else
      raise "Unable to find reader for File or String #{object}"
    end
  elsif object.is_a? Rserve::REXP
    r_object(object, options, ask_on_ambiguous)
  else
    raise "not recognize Ruby objects of this type yet (#{object})"
  end
end

.r_object(object, options = {}, ask_on_ambiguous = true) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/bio-publisci/dataset/dataset_for.rb', line 48

def self.r_object(object, options={}, ask_on_ambiguous=true)
  if object.is_a? String
    con = Rserve::Connection.new
    vars = con.eval("load('#{File.absolute_path object}')")
    if vars.to_ruby.size > 1 && ask_on_ambiguous
      puts "Which variable? #{vars.to_ruby}"
      var = vars.to_ruby[gets.to_i]
    else
      var = vars.to_ruby[0]
    end

    r_classes = con.eval("class(#{var})").to_ruby

    if r_classes.include? "data.frame"
      df = PubliSci::Reader::Dataframe.new
      unless options[:dimensions] || !ask_on_ambiguous
        dims = con.eval("names(#{var})").to_ruby
        puts "Which dimensions? #{dims}"
        selection = gets.chomp
        if selection.size > 0
          options[:dimensions] = selection.split(',').map(&:to_i).map{|i| dims[i]}
        end
      end
      unless options[:measures] || !ask_on_ambiguous
        meas = con.eval("names(#{var})").to_ruby
        puts "Which measures? #{meas} "
        selection = gets.chomp
        if selection.size > 0
          options[:measures] = selection.split(',').map(&:to_i).map{|i| meas[i]}
        end
      end

      df.generate_n3(con.eval(var),var,options)

    elsif r_classes.include? "cross"
      bc = PubliSci::Reader::RCross.new

      unless options[:measures] || !ask_on_ambiguous
        pheno_names = con.eval("names(#{var}$pheno)").to_ruby
        puts "Which phenotype traits? #{pheno_names}"
        selection = gets.chomp
        if selection.size > 0
          options[:measures] = selection.split(',').map(&:to_i).map{|i| pheno_names[i]}
        end
      end

      base = var
      if ask_on_ambiguous
        puts "Output file base?"
        base = gets.chomp
        base = var unless base.size > 0
      end

      bc.generate_n3(con, var, base, options)

    elsif r_classes.include? "matrix"
      mat = PubliSci::Reader::RMatrix.new

      unless options[:measures] || !ask_on_ambiguous
        puts "Row label"
        rows = gets.chomp
        rows = "row" unless rows.size > 0

        puts "Column label"
        cols = gets.chomp
        cols = "column" unless cols.size > 0

        puts "Entry label"
        vals = gets.chomp
        vals = "value" unless vals.size > 0

        options[:measures] = [cols,rows,vals]
      end

      base = var
      if ask_on_ambiguous
        puts "Output file base?"
        base = gets.chomp
        base = var unless base.size > 0
      end

      mat.generate_n3(con, var, base, options)
    else
      raise "no PubliSci::Reader found for #{r_classes}"
    end

  elsif object.is_a? Rserve::REXP
    if object.attr.payload["class"].payload.first

      df = PubliSci::Reader::Dataframe.new

      var = nil

      if ask_on_ambiguous
        var = interact("Dataset name?",nil)
      end

      unless options[:dimensions] || !ask_on_ambiguous
        dims = object.payload.names
        selection = interact("Which dimensions?","row",dims){|s| puts s; nil}
        options[:dimensions] = selection if selection
      end

      unless options[:measures] || !ask_on_ambiguous
        meas = object.payload.names
        options[:measures] = interact("Which measures?",meas,meas)
      end

      df.generate_n3(object,var,options)
    else
      raise "support for other Rserve objects coming shortly"
    end

  else
    raise "#{object} is not an R object"
  end
end