背景
spark 設置checkpoint 的地址爲阿里雲的hdfs 報錯
spark.sparkContext.setCheckpointDir('dfs://f***iyuncs.com:10290/test')
集羣環境
正式環境
1593273600
2020-06-28 14:45:05.159335
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-19-d5dc196d0abf> in <module>
357 # print(spark.conf.get('spark.driver.maxResultSize'))
358 # print(spark.conf.get('spark.default.parallelism'))
--> 359 t.handle_data(spark, n=1)
360 spark.stop()
<ipython-input-19-d5dc196d0abf> in wrap_f(*args, **kwargs)
32 def wrap_f(*args, **kwargs):
33 start_time = time.time()
---> 34 resutl = f(*args, **kwargs)
35 end_time = time.time()
36 print(f"""{f.__name__} 程序執行時間 爲 {end_time - start_time} """)
<ipython-input-19-d5dc196d0abf> in handle_data(self, spark, n)
312 exerdf.persist(storageLevel=StorageLevel.MEMORY_AND_DISK)
313
--> 314 exerdf.checkpoint()
315 treedf.checkpoint()
316 # print(treedf.count())
/opt/python3.6.7/lib/python3.6/site-packages/pyspark/sql/dataframe.py in checkpoint(self, eager)
433 .. note:: Experimental
434 """
--> 435 jdf = self._jdf.checkpoint(eager)
436 return DataFrame(jdf, self.sql_ctx)
437
/opt/python3.6.7/lib/python3.6/site-packages/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/opt/python3.6.7/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/opt/python3.6.7/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o1812.checkpoint.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4, 172.20.4.28, executor 0): java.io.EOFException: End of File Exception between local host is: "workers-k6dnm/172.20.4.28"; destination host is: "f-**fs.aliyuncs.com":10290; : java.io.EOFException; For more details see: http://wiki.apache.org/hadoop/EOFException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:824)
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:788)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1495)