pyspark.SparkFiles.get#
- classmethod SparkFiles.get(filename)[source]#
- Get the absolute path of a file added through - SparkContext.addFile()or- SparkContext.addPyFile().- New in version 0.7.0. - Parameters
- filenamestr
- file that are added to resources 
 
- Returns
- str
- the absolute path of the file 
 
 - See also - Examples - >>> import os >>> import tempfile >>> from pyspark import SparkFiles - >>> with tempfile.TemporaryDirectory(prefix="get") as d: ... path1 = os.path.join(d, "test.txt") ... with open(path1, "w") as f: ... _ = f.write("100") ... ... sc.addFile(path1) ... file_list1 = sorted(sc.listFiles) ... ... def func1(iterator): ... path = SparkFiles.get("test.txt") ... assert path.startswith(SparkFiles.getRootDirectory()) ... return [path] ... ... path_list1 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func1).collect() ... ... path2 = os.path.join(d, "test.py") ... with open(path2, "w") as f: ... _ = f.write("import pyspark") ... ... # py files ... sc.addPyFile(path2) ... file_list2 = sorted(sc.listFiles) ... ... def func2(iterator): ... path = SparkFiles.get("test.py") ... assert path.startswith(SparkFiles.getRootDirectory()) ... return [path] ... ... path_list2 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func2).collect() >>> file_list1 ['file:/.../test.txt'] >>> set(path_list1) {'.../test.txt'} >>> file_list2 ['file:/.../test.py', 'file:/.../test.txt'] >>> set(path_list2) {'.../test.py'}