尝试使用confluent_kafka python将csv文件推送到kafka主题时出错

时间:2019-05-03 17:37:33

标签: python-3.x kafka-producer-api confluent confluent-schema-registry confluent-kafka

我正在尝试使用AvroProducer.s将csv文件推送到Kafka主题。我正在读取csv文件并在Kafka主题中逐行循环。 csv文件有10列,它们与value_schema_str变量中定义的相同。但是我遇到了错误。使用Python 3.7,avro-python3 1.8.2,confluent_kafka

运行Producer.py时出现此错误

Traceback (most recent call last):
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 384, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 380, in _make_request
    httplib_response = conn.getresponse()
  File "/anaconda3/lib/python3.7/http/client.py", line 1321, in getresponse
    response.begin()
  File "/anaconda3/lib/python3.7/http/client.py", line 296, in begin
    version, status, reason = self._read_status()
  File "/anaconda3/lib/python3.7/http/client.py", line 278, in _read_status
    raise BadStatusLine(line)
http.client.BadStatusLine: P

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/anaconda3/lib/python3.7/site-packages/requests/adapters.py", line 449, in send
    timeout=timeout
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 638, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/anaconda3/lib/python3.7/site-packages/urllib3/util/retry.py", line 367, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 384, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 380, in _make_request
    httplib_response = conn.getresponse()
  File "/anaconda3/lib/python3.7/http/client.py", line 1321, in getresponse
    response.begin()
  File "/anaconda3/lib/python3.7/http/client.py", line 296, in begin
    version, status, reason = self._read_status()
  File "/anaconda3/lib/python3.7/http/client.py", line 278, in _read_status
    raise BadStatusLine(line)
urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine('\x15\x03\x03\x00\x02\x02P'))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "producer.py", line 97, in <module>
    avroProducer.produce(topic='UserActivity', value=row)
  File "/anaconda3/lib/python3.7/site-packages/confluent_kafka/avro/__init__.py", line 80, in produce
    value = self._serializer.encode_record_with_schema(topic, value_schema, value)
  File "/anaconda3/lib/python3.7/site-packages/confluent_kafka/avro/serializer/message_serializer.py", line 105, in encode_record_with_schema
    schema_id = self.registry_client.register(subject, schema)
  File "/anaconda3/lib/python3.7/site-packages/confluent_kafka/avro/cached_schema_registry_client.py", line 215, in register
    result, code = self._send_request(url, method='POST', body=body)
  File "/anaconda3/lib/python3.7/site-packages/confluent_kafka/avro/cached_schema_registry_client.py", line 164, in _send_request
    response = self._session.request(method, url, headers=_headers, json=body)
  File "/anaconda3/lib/python3.7/site-packages/requests/sessions.py", line 533, in request
    resp = self.send(prep, **send_kwargs)
  File "/anaconda3/lib/python3.7/site-packages/requests/sessions.py", line 646, in send
    r = adapter.send(request, **kwargs)
  File "/anaconda3/lib/python3.7/site-packages/requests/adapters.py", line 498, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine('\x15\x03\x03\x00\x02\x02P'))

Producer.py

from confluent_kafka import Producer
from avro import schema
import avro
from confluent_kafka.avro import AvroProducer
import csv


AvroProducerConf= {
        'bootstrap.servers':'confluent-cp-kafka-external.confluent-----',
        'schema.registry.url':'https://confluent-cp-schema-registry-----',                                                                                                                                                                                                                    
        'security.protocol':'SSL',
        'ssl.ca.location':'rootca',
        'ssl.certificate.location':'server_qa.crt',
        'ssl.key.location':'server_qa.key',
        #'ssl.key.password':'123456',
        'debug':'msg,topic,broker'
        #'group.id': 'testing_group',
    }



value_schema_str = """
{
   "namespace": "my.test",
   "name": "value",
   "type": "record",
   "fields" : [
     {"name" : "id", "type" : "long"},
     {"name" : "activity_id", "type" : "string"},
     {"name" : "user_activity_id", "type" : "long"},
     {"name" : "activity_name", "type" : "string"},
     {"name" : "num_points", "type" : "long"},
     {"name" : "notes", "type" : "string"},
     {"name" : "third_party_id", "type" : "string"},
     {"name" : "email_address", "type" : "string"},
     {"name" : "mobile_phone_number", "type" : "string"},
     {"name" : "dateCreated", "type" : "string"}
   ]
}
"""


value_schema = schema.Parse(value_schema_str)


avroProducer = AvroProducer(AvroProducerConf, default_value_schema=value_schema)

with open('/Users/Downloads/documents_20190417/2_121_USER_ACTIVITY_20190301.csv') as file:
    reader = csv.DictReader(file, delimiter=",")
    for row in reader:
        avroProducer.produce(topic='UserActivity', value=row)
        avroProducer.flush()

0 个答案:

没有答案