Class: SparkToolkit::HDFS::FileSystem

Inherits:
Object
  • Object
show all
Defined in:
lib/spark_toolkit/hadoop/hdfs/file_system.rb

Instance Method Summary collapse

Constructor Details

#initialize(url, conf) ⇒ FileSystem

Returns a new instance of FileSystem.



9
10
11
12
13
14
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 9

def initialize(url, conf)
  @url = url
  @hdfs_conf = conf
  UserGroupInformation.set_configuration(@hdfs_conf)
  @hdfs = org.apache.hadoop.fs.FileSystem.get(URI.create(url), @hdfs_conf)
end

Instance Method Details

#copy_to_local(hdfs_src, local_dst) ⇒ Object



40
41
42
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 40

def copy_to_local(hdfs_src, local_dst)
  @hdfs.copy_to_local_file(false, Path.new(hdfs_src), Path.new(local_dst), true)
end

#delete(path, recursively = false) ⇒ Object Also known as: rm



48
49
50
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 48

def delete(path, recursively=false)
  @hdfs.delete(Path.new(path), recursively)
end

#exists?(path) ⇒ Boolean

Returns:

  • (Boolean)


44
45
46
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 44

def exists?(path)
  @hdfs.exists(Path.new(path))
end

#list(path, recursively = false) ⇒ Object Also known as: ls



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 23

def list(path, recursively=false)
  if recursively
    paths = []
    dir_itr = @hdfs.listFiles(Path.new(path), true)

    while dir_itr.hasNext
      next_path = dir_itr.next.getPath
      paths << next_path
    end
    paths
  else
    file_status = @hdfs.listStatus(Path.new(path))
    FileUtil.stat2Paths(file_status)
  end
end

#mkdir(path) ⇒ Object Also known as: mkdir_p



57
58
59
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 57

def mkdir(path)
  @hdfs.mkdirs(Path.new(path))
end

#open(path) ⇒ Object

Returns

  • <~HdfsInputStream>



19
20
21
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 19

def open(path)
  @hdfs.open(Path.new(path))
end

#put(local_src, hdfs_dst) ⇒ Object



53
54
55
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 53

def put(local_src, hdfs_dst)
  @hdfs.copyFromLocalFile(false, true, Path.new(local_src), Path.new(hdfs_dst))
end