我正在尝试使用AWS Redshift和s3与docker启动数据流项目的气流。运行docker build -t my-airflow之后。成功地,docker-compose up输出以下错误。似乎在docker构建过程中,未安装require.txt中依赖项的依赖项。在这种情况下,尽管未安装气流的依赖关系“ typing_extensions”,但仍安装了require.txt中的气流。
运行docker-compose up
时输出错误:
webserver_1 | Traceback (most recent call last):
webserver_1 | File "/usr/local/bin/airflow", line 26, in <module>
webserver_1 | from airflow.bin.cli import CLIFactory
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/bin/cli.py", line 79, in <module>
webserver_1 | api_module = import_module(conf.get('cli', 'api_client')) # type: Any
webserver_1 | File "/usr/local/lib/python3.8/importlib/__init__.py", line 127, in import_module
webserver_1 | return _bootstrap._gcd_import(name[level:], package, level)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/api/client/local_client.py", line 24, in <module>
webserver_1 | from airflow.api.common.experimental import delete_dag
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/api/common/experimental/delete_dag.py", line 25, in <module>
webserver_1 | from airflow.models.serialized_dag import SerializedDagModel
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/models/serialized_dag.py", line 32, in <module>
webserver_1 | from airflow.serialization.serialized_objects import SerializedDAG
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/serialization/serialized_objects.py", line 36, in <module>
webserver_1 | Traceback (most recent call last):
webserver_1 | File "/usr/local/bin/airflow", line 26, in <module>
webserver_1 | from airflow.bin.cli import CLIFactory
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/bin/cli.py", line 79, in <module>
webserver_1 | api_module = import_module(conf.get('cli', 'api_client')) # type: Any
webserver_1 | File "/usr/local/lib/python3.8/importlib/__init__.py", line 127, in import_module
webserver_1 | return _bootstrap._gcd_import(name[level:], package, level)
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/api/client/local_client.py", line 24, in <module>
webserver_1 | from airflow.api.common.experimental import delete_dag
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/api/common/experimental/delete_dag.py", line 25, in <module>
webserver_1 | from airflow.models.serialized_dag import SerializedDagModel
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/models/serialized_dag.py", line 32, in <module>
webserver_1 | from airflow.serialization.serialized_objects import SerializedDAG
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/serialization/serialized_objects.py", line 36, in <module>
webserver_1 | from airflow.serialization.json_schema import Validator, load_dag_schema
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/serialization/json_schema.py", line 26, in <module>
webserver_1 | from typing_extensions import Protocol
webserver_1 | ModuleNotFoundError: No module named 'typing_extensions'
webserver_1 | from airflow.serialization.json_schema import Validator, load_dag_schema
webserver_1 | File "/usr/local/lib/python3.8/site-packages/airflow/serialization/json_schema.py", line 26, in <module>
webserver_1 | from typing_extensions import Protocol
webserver_1 | ModuleNotFoundError: No module named 'typing_extensions'
Dockerfile:
FROM python:3
WORKDIR /usr/local/airflow/
ENV AIRFLOW_HOME=/usr/local/airflow
COPY requirements.txt ./
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
COPY script/entrypoint.sh /entrypoint.sh
COPY config/airflow.cfg $AIRFLOW_HOME/airflow.cfg
ENTRYPOINT ["/entrypoint.sh"]
CMD ["webserver"]
docker-compose.yml:
version: '3.7'
services:
postgres:
image: postgres:9.6
environment:
- POSTGRES_USER=learning-projects
- POSTGRES_PASSWORD=Oldmans2ricebowls1tongs4graveyards3
- POSTGRES_DB=dwh
logging:
options:
max-size: 10m
max-file: "3"
ports:
- "5439"
webserver:
image: my-airflow
# build:
# context: .
# args:
# AIRFLOW_DEPS: "postgres, aws"
restart: always
depends_on:
- postgres
environment:
- LOAD_EX=n
- EXECUTOR=Local
logging:
options:
max-size: 10m
max-file: "3"
volumes:
- ./dags:/usr/local/airflow/dags
- ./plugins:/usr/local/airflow/plugins
- ./requirements.txt:/requirements.txt
ports:
- "8080"
command: webserver
# healthcheck:
# test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
# interval: 30s
# timeout: 30s
# retries: 3
entrypoint.sh:
#!/usr/bin/env bash
TRY_LOOP="20"
# Global defaults and back-compat
: "${AIRFLOW_HOME:="/usr/local/airflow"}"
: "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}"
: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}"
export \
AIRFLOW_HOME \
AIRFLOW__CORE__EXECUTOR \
AIRFLOW__CORE__FERNET_KEY
case "$1" in
webserver)
airflow initdb
if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ] || [ "$AIRFLOW__CORE__EXECUTOR" = "SequentialExecutor" ]; then
# With the "Local" and "Sequential" executors it should all run in one container.
airflow scheduler &
fi
exec airflow webserver
;;
worker|scheduler)
# Give the webserver time to run initdb.
sleep 10
exec airflow "$@"
;;
flower)
sleep 10
exec airflow "$@"
;;
version)
exec airflow "$@"
;;
*)
# The command is something like bash, not an airflow subcommand. Just run it in the right environment.
exec "$@"
;;
esac