Module: Hdfs

Includes:
Java
Defined in:
lib/hdfs_jruby.rb,
lib/hdfs_jruby/file.rb,
lib/hdfs_jruby/version.rb

Defined Under Namespace

Classes: Configuration, File, FileSystem, FsPermission, Path

Constant Summary collapse

JAR_PATTERN_0_20 =
"hadoop-core-*.jar"
HADOOP_HOME =
VERSION =
"0.0.9"

Class Method Summary collapse

Class Method Details

._conv(stat) ⇒ Object



249
250
251
252
253
254
255
256
257
258
259
# File 'lib/hdfs_jruby.rb', line 249

def _conv(stat)
  file_info = {}
  file_info['path'] = stat.getPath.to_s
  file_info['length'] = stat.getLen.to_i
  file_info['modificationTime'] = stat.getModificationTime.to_i
  file_info['owner'] = stat.getOwner.to_s
  file_info['group'] = stat.getGroup.to_s
  file_info['permission'] = stat.getPermission.toShort.to_i
  file_info['type'] = !stat.isDir ? 'FILE': 'DIRECTORY'
  return file_info
end

._path(path) ⇒ Object



241
242
243
244
245
246
# File 'lib/hdfs_jruby.rb', line 241

def _path(path)
  if path.nil?
    raise "path is nil"
  end
  Path.new(path)
end

.connectAsUser(user) ⇒ Object



54
55
56
57
58
# File 'lib/hdfs_jruby.rb', line 54

def connectAsUser(user)
  uri =  Hdfs::FileSystem.getDefaultUri(@conf)
  @fs.close if ! @fs.nil?
  @fs = Hdfs::FileSystem.get(uri, @conf, user)
end

.delete(path, r = false) ⇒ Object

delete

Parameters:

  • path (String)
  • r (Boolean) (defaults to: false)

    recursive false or true (default: false)



146
147
148
# File 'lib/hdfs_jruby.rb', line 146

def delete(path, r=false)
  @fs.delete(_path(path), r)
end

.directory?(path) ⇒ Boolean

Returns true: directory, false: file.

Returns:

  • (Boolean)

    true: directory, false: file



156
157
158
# File 'lib/hdfs_jruby.rb', line 156

def directory?(path)
  @fs.isDirectory(_path(path))
end

.exists?(path) ⇒ Boolean

Parameters:

  • path (String)

Returns:

  • (Boolean)


132
133
134
# File 'lib/hdfs_jruby.rb', line 132

def exists?(path)
  @fs.exists(_path(path))
end

.file?(path) ⇒ Boolean

Returns true: file, false: directory.

Returns:

  • (Boolean)

    true: file, false: directory



151
152
153
# File 'lib/hdfs_jruby.rb', line 151

def file?(path)
  @fs.isFile(_path(path))
end

.get(remote, local) ⇒ Object

get file or directory from hdfs

Parameters:

  • remote (String)

    surouce (hdfs path)

  • local (String)

    destination (local path)



181
182
183
# File 'lib/hdfs_jruby.rb', line 181

def get(remote, local)
  @fs.copyToLocalFile(Path.new(remote), Path.new(local))
end

.get_fsObject



215
216
217
# File 'lib/hdfs_jruby.rb', line 215

def get_fs
  @fs
end

.get_home_directoryObject

get home directory



186
187
188
# File 'lib/hdfs_jruby.rb', line 186

def get_home_directory()
  @fs.getHomeDirectory()
end

.get_working_directoryObject

get working directory



191
192
193
# File 'lib/hdfs_jruby.rb', line 191

def get_working_directory()
  @fs.getWorkingDirectory()
end

.list(path, opts = {}) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/hdfs_jruby.rb', line 106

def list(path, opts={})
  use_glob = opts[:glob] ? true : false
  p = _path(path)

  list = nil
  if use_glob
    list = @fs.globStatus(p)
  else
    list = @fs.listStatus(p)
  end
  return [] if list.nil?
    
  if ! block_given?
    ret_list = []
    list.each do | stat |
      ret_list << _conv(stat)
    end
    return ret_list
  else
    list.each do | stat |
      yield _conv(stat)
    end
  end
end

.ls(path) ⇒ Array

Note:

file status: path length modificationTime owner group permission type

ls

Examples:

Hdfs.ls("hoge/").each do | stat |
  p stat
end

Parameters:

  • path (String)

Returns:

  • (Array)

    file status array



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/hdfs_jruby.rb', line 76

def ls(path)
  p = _path(path)
  list = @fs.globStatus(p)
  return [] if list.nil?

  ret_list = []
  list.each do |stat|
    if stat.isDir
      sub_list = @fs.listStatus(stat.getPath)
      next if sub_list.nil?
      
      sub_list.each do | s |
        if block_given?
          yield _conv(s)
        else
          ret_list << _conv(s)
        end
      end
    else
      if block_given?
        yield _conv(stat)
      else
        ret_list << _conv(stat)
      end
    end
  end
  ret_list if ! block_given?
end

.mkdir(path) ⇒ Object

make directory

Parameters:

  • path (String)


167
168
169
# File 'lib/hdfs_jruby.rb', line 167

def mkdir(path)
  @fs.mkdirs(_path(path))
end

.move(src, dst) ⇒ Object

Parameters:

  • src (String)

    hdfs source path

  • dst (String)

    hdfs destination path



138
139
140
# File 'lib/hdfs_jruby.rb', line 138

def move(src, dst)
  @fs.rename(Path.new(src), Path.new(dst))
end

.put(local, remote) ⇒ Object

put file or directory to hdfs

Parameters:

  • local (String)

    surouce (local path)

  • remote (String)

    destination (hdfs path)



174
175
176
# File 'lib/hdfs_jruby.rb', line 174

def put(local, remote)
  @fs.copyFromLocalFile(Path.new(local), Path.new(remote))
end

.set_owner(path, owner, group) ⇒ Object

set owner & group

Parameters:

  • path (String)
  • owner (String)
  • group (String)


211
212
213
# File 'lib/hdfs_jruby.rb', line 211

def set_owner(path, owner, group)
  @fs.setOwner(_path(path), owner, group)
end

.set_permission(path, perm) ⇒ Object

set permission

Parameters:

  • path (String)
  • perm (Integer)

    permission



203
204
205
# File 'lib/hdfs_jruby.rb', line 203

def set_permission(path, perm)
  @fs.setPermission(_path(path), org.apache.hadoop.fs.permission.FsPermission.new(perm))
end

.set_working_directory(path) ⇒ Object

set working directory



196
197
198
# File 'lib/hdfs_jruby.rb', line 196

def set_working_directory(path)
  @fs.setWorkingDirectory(_path())
end

.size(path) ⇒ Integer

Returns file size.

Returns:

  • (Integer)

    file size



161
162
163
# File 'lib/hdfs_jruby.rb', line 161

def size(path)
  @fs.getFileStatus(_path(path)).getLen()
end