我正在尝试使用docker为使用AWS Redshift集群的数据管道项目使用docker启动气流UI。我打算在Airflow中使用LocalExecutor。我在airflow.cfg文件中使用AWS redshift集群信息指定了sql_alchemy_conn
变量。连接字符串格式:sql_alchemy_conn = postgresql+psycopg2://user:password@clusterinfo.region.redshift.amazonaws.com:5439/db
成功运行docker build -t my-airflow .
后,docker-compose up
输出以下错误:
webserver_1 | Traceback (most recent call last):
webserver_1 | File "/usr/local/bin/airflow", line 37, in <module>
webserver_1 | args.func(args)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/utils/cli.py", line 75, in wrapper
webserver_1 | return f(*args, **kwargs)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/bin/cli.py", line 1040, in scheduler
webserver_1 | job.run()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/jobs/base_job.py", line 215, in run
webserver_1 | session.commit()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1042, in commit
webserver_1 | self.transaction.commit()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 504, in commit
webserver_1 | self._prepare_impl()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 483, in _prepare_impl
webserver_1 | self.session.flush()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2523, in flush
webserver_1 | self._flush(objects)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2664, in _flush
webserver_1 | transaction.rollback(_capture_exception=True)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/langhelpers.py", line 68, in __exit__
webserver_1 | compat.raise_(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 178, in raise_
webserver_1 | raise exception
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2624, in _flush
webserver_1 | flush_context.execute()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/unitofwork.py", line 422, in execute
webserver_1 | rec.execute(self)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/unitofwork.py", line 586, in execute
webserver_1 | persistence.save_obj(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/persistence.py", line 239, in save_obj
webserver_1 | _emit_insert_statements(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/persistence.py", line 1135, in _emit_insert_statements
webserver_1 | result = cached_connections[connection].execute(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1020, in execute
webserver_1 | return meth(self, multiparams, params)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 298, in _execute_on_connection
webserver_1 | return connection._execute_clauseelement(self, multiparams, params)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1133, in _execute_clauseelement
webserver_1 | ret = self._execute_context(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1213, in _execute_context
webserver_1 | self._handle_dbapi_exception(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1517, in _handle_dbapi_exception
webserver_1 | util.raise_(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 178, in raise_
webserver_1 | raise exception
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1211, in _execute_context
webserver_1 | context = constructor(dialect, self, conn, *args)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 816, in _init_compiled
webserver_1 | self._process_executesingle_defaults()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 1575, in _process_executesingle_defaults
webserver_1 | val = self.get_insert_default(c)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/base.py", line 2433, in get_insert_default
webserver_1 | return self._execute_scalar(exc, column.type)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 1103, in _execute_scalar
webserver_1 | conn._cursor_execute(self.cursor, stmt, default_params, context=self)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1360, in _cursor_execute
webserver_1 | self._handle_dbapi_exception(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1517, in _handle_dbapi_exception
webserver_1 | util.raise_(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 178, in raise_
webserver_1 | raise exception
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1358, in _cursor_execute
webserver_1 | self.dialect.do_execute(cursor, statement, parameters, context)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 590, in do_execute
webserver_1 | cursor.execute(statement, parameters)
webserver_1 | sqlalchemy.exc.StatementError: (sqlalchemy.exc.ProgrammingError) (psycopg2.errors.UndefinedTable) relation "job_id_seq" does not exist
webserver_1 |
webserver_1 | [SQL: INSERT INTO job (id, dag_id, state, job_type, start_date, end_date, latest_heartbeat, executor_class, hostname, unixname) VALUES (%(id)s, %(dag_id)s, %(state)s, %(job_type)s, %(start_date)s, %(end_date)s, %(latest_heartbeat)s, %(executor_class)s, %(hostname)s, %(unixname)s)]
webserver_1 | [parameters: [{'job_type': 'SchedulerJob', 'unixname': 'root', 'executor_class': 'NoneType', 'hostname': 'fb44572de502', 'start_date': datetime.datetime(2020, 5, 1 ... (48 characters truncated) ... 'state': 'running', 'latest_heartbeat': datetime.datetime(2020, 5, 15, 4, 40, 59, 953160, tzinfo=<Timezone [UTC]>), 'dag_id': None, 'end_date': None}]]
webserver_1 | (Background on this error at: http://sqlalche.me/e/f405)
webserver_1 | Traceback (most recent call last):
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1283, in _execute_context
webserver_1 | self.dialect.do_execute(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 590, in do_execute
webserver_1 | cursor.execute(statement, parameters)
webserver_1 | psycopg2.errors.UndefinedTable: relation "connection" does not exist
webserver_1 |
webserver_1 |
webserver_1 | The above exception was the direct cause of the following exception:
webserver_1 |
webserver_1 | Traceback (most recent call last):
webserver_1 | File "/usr/local/bin/airflow", line 37, in <module>
webserver_1 | args.func(args)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/utils/cli.py", line 75, in wrapper
webserver_1 | return f(*args, **kwargs)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/bin/cli.py", line 900, in webserver
webserver_1 | app = cached_app_rbac(None) if settings.RBAC else cached_app(None)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/www/app.py", line 233, in cached_app
webserver_1 | app = create_app(config, testing)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/www/app.py", line 88, in create_app
webserver_1 | from airflow.www import views
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/www/views.py", line 2443, in <module>
webserver_1 | class ChartModelView(wwwutils.DataProfilingMixin, AirflowModelView):
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/www/views.py", line 2534, in ChartModelView
webserver_1 | 'conn_id': _connection_ids()
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/utils/db.py", line 74, in wrapper
webserver_1 | return func(*args, **kwargs)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/www/views.py", line 2437, in _connection_ids
webserver_1 | return [(c.conn_id, c.conn_id) for c in (
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3481, in __iter__
webserver_1 | return self._execute_and_instances(context)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3506, in _execute_and_instances
webserver_1 | result = conn.execute(querycontext.statement, self._params)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1020, in execute
webserver_1 | return meth(self, multiparams, params)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 298, in _execute_on_connection
webserver_1 | return connection._execute_clauseelement(self, multiparams, params)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1133, in _execute_clauseelement
webserver_1 | ret = self._execute_context(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1323, in _execute_context
webserver_1 | self._handle_dbapi_exception(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1517, in _handle_dbapi_exception
webserver_1 | util.raise_(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 178, in raise_
webserver_1 | raise exception
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1283, in _execute_context
webserver_1 | self.dialect.do_execute(
webserver_1 | File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 590, in do_execute
webserver_1 | cursor.execute(statement, parameters)
webserver_1 | sqlalchemy.exc.ProgrammingError: (psycopg2.errors.UndefinedTable) relation "connection" does not exist
webserver_1 |
webserver_1 | [SQL: SELECT connection.conn_id AS connection_conn_id
webserver_1 | FROM connection GROUP BY connection.conn_id]
webserver_1 | (Background on this error at: http://sqlalche.me/e/f405)
Dockerfile:
FROM python:3
WORKDIR /usr/local/airflow/
ENV AIRFLOW_HOME=/usr/local/airflow
COPY requirements.txt ./
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
COPY script/entrypoint.sh /entrypoint.sh
COPY config/airflow.cfg $AIRFLOW_HOME/airflow.cfg
ENTRYPOINT ["/entrypoint.sh"]
CMD ["webserver"]
docker-compose.yml:
version: '3.7'
services:
postgres:
image: postgres:9.6
environment:
- POSTGRES_USER=x
- POSTGRES_PASSWORD=x
- POSTGRES_DB=x
logging:
options:
max-size: 10m
max-file: "3"
ports:
- "5439"
webserver:
image: my-airflow
build:
context: .
args:
AIRFLOW_DEPS: "postgres, aws"
restart: always
depends_on:
- postgres
environment:
- LOAD_EX=n
- EXECUTOR=Local
logging:
options:
max-size: 10m
max-file: "3"
volumes:
- ./dags:/usr/local/airflow/dags
- ./plugins:/usr/local/airflow/plugins
- ./requirements.txt:/requirements.txt
ports:
- "8080"
command: webserver
# healthcheck:
# test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
# interval: 30s
# timeout: 30s
# retries: 3
entrypoint.sh:
#!/usr/bin/env bash
airflow initdb
airflow scheduler &
exec airflow webserver
requirements.txt:
apache-airflow==1.10.9
cryptography==2.9.2
docutils==0.15.2
boto3==1.12.41
notebook==6.0.3
numpy==1.18.1
pandas==0.25.3
psycopg2-binary==2.8.5
typing-extensions==3.7.4.2