尝试将spark DataFrame写为PostgreSQL表时出现以下错误:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-121-159b38b4c333> in <module>()
7 "password":"112211234",
8 "driver":"org.postgresql.Driver",
----> 9 "client_encoding":"utf8"
10 }
11 )
/home/ec2-user/spark-2.0.1-bin-hadoop2.6/python/pyspark/sql/readwriter.pyc in jdbc(self, url, table, mode, properties)
760 for k in properties:
761 jprop.setProperty(k, properties[k])
--> 762 self._jwrite.mode(mode).jdbc(url, table, jprop)
763
764
/home/ec2-user/spark-2.0.1-bin-hadoop2.6/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/ec2-user/spark-2.0.1-bin-hadoop2.6/python/pyspark/sql/utils.pyc in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/home/ec2-user/spark-2.0.1-bin-hadoop2.6/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
317 raise Py4JJavaError(
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
321 raise Py4JError(
<type 'str'>: (<type 'exceptions.UnicodeEncodeError'>, UnicodeEncodeError('ascii', u'An error occurred while calling o3418.jdbc.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 105.0 failed 4 times, most recent failure: Lost task 5.3 in stage 105.0 (TID 1937, 10.0.0.52): org.postgresql.util.PSQLException: \u041f\u043e\u0434\u0441\u043e\u0435\u0434\u0438\u043d\u0435\u043d\u0438\u0435 \u043f\u043e \u0430\u0434\u0440\u0435\u0441\u0443 localhost:5432 \u043e\u0442\u043a\u043b\u043e\u043d\u0435\u043d\u043e. \u041f\u0440\u043e\u0432\u0435\u0440\u044c\u0442\u0435 \u0447\u0442\u043e \u0445\u043e\u0441\u0442 \u0438 \u043f\u043e\u0440\u0442 \u0443\u043a\u0430\u0437\u0430\u043d\u044b \u043f\u0440\u0430\u0432\u0438\u043b\u044c\u043d\u043e \u0438 \u0447\u0442\u043e postmaster \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0435\u0442 TCP/IP-\u043f\u043e\u0434\u0441\u043e\u0435\u0434\u0438\u043d\u0435\u043d\u0438\u044f.\n\tat org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:262)\n\tat org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:52)\n\tat org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:216)\n\tat org.postgresql.Driver.makeConnection(Driver.java:404)\n\tat org.postgresql.Driver.connect(Driver.java:272)\n\tat org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.connect(DriverWrapper.scala:45)
DataFrame如下:
from pyspark.sql import SQLContext, Row, DataFrame, SparkSession
from pyspark.sql.types import *
spark = SparkSession.builder.appName("test") \
.config("spark.some.config.option", "test") \
.getOrCreate()
fields = [
StructField("id", IntegerType(), True),
StructField("name", StringType(), True),
StructField("age", IntegerType(), True)
]
schema = StructType(fields)
test = spark.createDataFrame([
Row(id=1, name=u"a", age=34),
Row(id=2, name=u"b", age=25)
], schema)
test.show()
即。这一个
+---+----+---+
| id|name|age|
+---+----+---+
| 1| a| 34|
| 2| b| 25|
+---+----+---+
要将它写入PostgreSQL,我使用代码:
test.write.jdbc(
url="jdbc:postgresql://localhost:5432/db",
table="test",
mode="overwrite",
properties={
"user":"root",
"password":"12345",
"driver":"org.postgresql.Driver",
"client_encoding":"utf8"
}
)
但它会产生上面显示的错误。找不到这个例外的原因。
使用postres控制台创建的现有表的读取工作正常。
我将不胜感激。