Class: SparkToolkit::HDFS::FileSystem

Inherits:
Object
  • Object
show all
Defined in:
lib/spark_toolkit/hadoop/hdfs/file_system.rb

Instance Method Summary collapse

Constructor Details

#initialize(url, conf) ⇒ FileSystem

Returns a new instance of FileSystem.



9
10
11
12
13
14
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 9

def initialize(url, conf)
  @url = url
  @hdfs_conf = conf
  UserGroupInformation.set_configuration(@hdfs_conf)
  @hdfs = org.apache.hadoop.fs.FileSystem.get(URI.create(url), @hdfs_conf)
end

Instance Method Details

#copy_to_local(hdfs_src, local_dst) ⇒ Object



40
41
42
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 40

def copy_to_local(hdfs_src, local_dst)
  @hdfs.copy_to_local_file(false, Path.new(hdfs_src), Path.new(local_dst), true)
end

#delete(path, recursively = false) ⇒ Object Also known as: rm



58
59
60
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 58

def delete(path, recursively=false)
  @hdfs.delete(Path.new(path), recursively)
end

#exists?(path) ⇒ Boolean

Returns:

  • (Boolean)


54
55
56
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 54

def exists?(path)
  @hdfs.exists(Path.new(path))
end

#get_file_status(entry) ⇒ Object Also known as: status



44
45
46
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 44

def get_file_status(entry)
  @hdfs.get_file_status(Path.new(entry))
end

#list(path, recursively = false) ⇒ Object Also known as: ls



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 23

def list(path, recursively=false)
  if recursively
    paths = []
    dir_itr = @hdfs.listFiles(Path.new(path), true)

    while dir_itr.hasNext
      next_path = dir_itr.next.getPath
      paths << next_path
    end
    paths
  else
    file_status = @hdfs.listStatus(Path.new(path))
    FileUtil.stat2Paths(file_status)
  end
end

#mkdir(path) ⇒ Object Also known as: mkdir_p



67
68
69
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 67

def mkdir(path)
  @hdfs.mkdirs(Path.new(path))
end

#open(path) ⇒ Object

Returns

  • <~HdfsInputStream>



19
20
21
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 19

def open(path)
  @hdfs.open(Path.new(path))
end

#put(local_src, hdfs_dst) ⇒ Object



63
64
65
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 63

def put(local_src, hdfs_dst)
  @hdfs.copyFromLocalFile(false, true, Path.new(local_src), Path.new(hdfs_dst))
end

#rename(src, dst) ⇒ Object Also known as: mv



49
50
51
# File 'lib/spark_toolkit/hadoop/hdfs/file_system.rb', line 49

def rename(src, dst)
  @hdfs.rename(Path.new(src), Path.new(dst))
end