Question

以下程序导致以下错误：from pyspark import SparkContext sc = SparkContext("local", "Local app") words = sc.parallelize ( ["scala", "java", "hadoop", "spark", "akka", "spark vs hadoop", "pyspark", "pyspark and spark"] ) words_filter = words.filter(lambda x: 'spark' in x) filtered = words_filter.take(4) print(filtered)

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-15-6c02343320b8> in <module>()
      1 from pyspark import SparkContext
      2 #sc = SparkSession.builder.master("local").appName("Word Count").config("spark.some.config.option", "some-value").getOrCreate()
----> 3 sc = SparkContext("local", "")
      4 
      5 words = sc.parallelize (

C:\opt\spark\spark-2.3.0-bin-hadoop2.7\spark-2.3.0-bin-hadoop2.7\python\pyspark\context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
    113         """
    114         self._callsite = first_spark_call() or CallSite(None, None, None)
--> 115         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)`enter code here`
    116         try:`enter code here`
    117             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,

C:\opt\spark\spark-2.3.0-bin-hadoop2.7\spark-2.3.0-bin-hadoop2.7\python\pyspark\context.py in _ensure_initialized(cls, instance, gateway, conf)
    278         with SparkContext._lock:
    279             if not SparkContext._gateway:
--> 280                 SparkContext._gateway = gateway or launch_gateway(conf)
    281                 SparkContext._jvm = SparkContext._gateway.jvm
    282 

C:\opt\spark\spark-2.3.0-bin-hadoop2.7\spark-2.3.0-bin-hadoop2.7\python\pyspark\java_gateway.py in launch_gateway(conf)
     78         else:
     79             # preexec_fn not supported on Windows
---> 80             proc = Popen(command, stdin=PIPE, env=env)
     81 
     82         gateway_port = None

C:\ProgramData\Anaconda3\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)
    707                                 c2pread, c2pwrite,
    708                                 errread, errwrite,
--> 709                                 restore_signals, start_new_session)
    710         except:
    711             # Cleanup if the child failed starting.

C:\ProgramData\Anaconda3\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)
    995                                          env,
    996                                          os.fspath(cwd) if cwd is not None else None,
--> 997                                          startupinfo)
    998             finally:
    999                 # Child is launched. Close the parent's copy of those pipe

FileNotFoundError: [WinError 2] The system cannot find the file specified

完整的堆栈跟踪：

var nodeExcel = require('excel-export');
var result = nodeExcel.execute(conf);
res.setHeader('Content-Type', 'application/vnd.openxmlformates');
res.setHeader("Content-Disposition",    "attachment;filename="+ "abcd.xlsx");
res.end(result, 'binary');

Answer 1

不确定这一点，因为我无法重现您的错误，但是通过查看java_gateway.py可能有助于检查环境变量$ SPARK_HOME，以及是否可以在$ SPARK_HOME下找到spark-submit脚本。

在Python中：

import os
print(os.environ.get("SPARK_HOME"))
print(os.path.join(os.environ.get("SPARK_HOME"), './bin/spark-submit.cmd'))

Answer 2

我认为您的会话创建不正确，请尝试以下操作：

sc = SparkSession.builder \
    .master('local[*]') \
    .appName('your app name') \
    .getOrCreate()

pyspark：FileNotFoundError：[WinError 2]系统找不到指定的文件

2 个答案: