我想从docker容器内部启动JuPyter笔记本,该容器是根据映像构建的,如下所示:
FROM debian:stretch
# Never prompts the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux
# Airflow
ARG AIRFLOW_VERSION=1.10.1
ENV AIRFLOW_HOME=/usr/local/airflow
ENV EMBEDDED_DAGS_LOCATION=./dags
ENV EMBEDDED_PLUGINS_LOCATION=./plugins
ENV SLUGIFY_USES_TEXT_UNIDECODE=yes
ENV PYTHONPATH=${PYTHONPATH}:${AIRFLOW_HOME}/athena-py
# Define en_US.
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8
ENV LC_ALL en_US.UTF-8
WORKDIR /requirements
# Only copy needed files
COPY ./requirements/airflow.txt /requirements/airflow.txt
RUN set -ex \
&& buildDeps=' \
build-essential \
libblas-dev \
libffi-dev \
libkrb5-dev \
liblapack-dev \
libpq-dev \
libxml2-dev \
libxslt1-dev \
python3-pip \
zlib1g-dev \
libcurl4-gnutls-dev \
libssh2-1-dev \
libldap2-dev \
' \
&& apt-get update -yqq \
&& apt-get install -yqq --no-install-recommends \
$buildDeps \
apt-utils \
curl \
git \
locales \
netcat \
gcc \
python3-dev \
libssl-dev \
libsasl2-dev \
openssh-server \
libsasl2-modules \
\
&& sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
&& locale-gen \
&& update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
&& useradd -ms /bin/bash -d ${AIRFLOW_HOME} -u 1002 airflow \
&& pip3 install --upgrade pip==9.0.3 'setuptools!=36.0.0' \
&& if [ ! -e /usr/bin/pip ]; then ln -s /usr/bin/pip3 /usr/bin/pip ; fi \
&& if [ ! -e /usr/bin/python ]; then ln -sf /usr/bin/python3 /usr/bin/python; fi \
&& pip3 install -r /requirements/airflow.txt \
&& apt-get remove --purge -yqq $buildDeps libpq-dev \
&& apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/* \
/usr/share/man \
/usr/share/doc \
/usr/share/doc-base
# Install env key
RUN curl -s https://raw.githubusercontent.com/envkey/envkey-source/master/install.sh | bash
# install athena HEAD
WORKDIR ${AIRFLOW_HOME}
COPY ./some-dir/requirements.txt some-dir/requirements.txt
RUN pip3 install -r some-dir/requirements.txt
COPY ./some-dir/ some-dir
COPY ./tests/ tests
COPY script/entrypoint.sh entrypoint.sh
COPY script/setup_connections.py setup_connections.py
COPY config/airflow.cfg airflow.cfg
COPY ${EMBEDDED_PLUGINS_LOCATION} plugins
COPY ${EMBEDDED_DAGS_LOCATION} dags
# Python3 Kernel for JuPyter notebooks
RUN python3 -m pip install ipykernel
RUN python3 -m ipykernel install --user
RUN mkdir -p /usr/local/airflow/.ipython/profile_default/startup/
RUN echo "import pandas as pd" > /usr/local/airflow/.ipython/profile_default/startup/athena.py
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install jupyter
RUN chown -R airflow ${AIRFLOW_HOME} \
&& chmod +x entrypoint.sh
EXPOSE 8080 5555 8793 8280 8888
USER airflow
ENTRYPOINT ["./entrypoint.sh"]
请注意我的# start jupyter notebook
"jupyter-notebook", "--ip=0.0.0.0", "--allow-root"
中的entrypoint.sh
。
entrypoint.sh是:
#!/usr/bin/env bash
echo "Setting up env vars..."
eval $(envkey-source)
echo "----------------------------------------------------------------------"
CMD="airflow"
TRY_LOOP="${TRY_LOOP:-10}"
POSTGRES_HOST="${POSTGRES_HOST:-postgres}"
POSTGRES_PORT=5432
POSTGRES_CREDS="${POSTGRES_CREDS:-airflow:airflow}"
RABBITMQ_HOST="${RABBITMQ_HOST:-rabbitmq}"
RABBITMQ_CREDS="${RABBITMQ_CREDS:-airflow:airflow}"
RABBITMQ_MANAGEMENT_PORT=15672
FLOWER_URL_PREFIX="${FLOWER_URL_PREFIX:-/flower}"
AIRFLOW_URL_PREFIX="${AIRFLOW_URL_PREFIX:-/airflow}"
LOAD_DAGS_EXAMPLES="${LOAD_DAGS_EXAMPLES:-false}"
REST_API_KEY="${REST_API_KEY:-airflow_api_key}"
if [ -z $FERNET_KEY ]; then
FERNET_KEY=$(python3 -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")
fi
echo "Postgres host: $POSTGRES_HOST"
echo "RabbitMQ host: $RABBITMQ_HOST"
echo "Load DAG examples: $LOAD_DAGS_EXAMPLES"
echo $1
# Generate Fernet key
sed -i "s/{{ REST_API_KEY }}/${REST_API_KEY}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ FERNET_KEY }}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ POSTGRES_HOST }}/${POSTGRES_HOST}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ POSTGRES_CREDS }}/${POSTGRES_CREDS}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ RABBITMQ_HOST }}/${RABBITMQ_HOST}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ RABBITMQ_CREDS }}/${RABBITMQ_CREDS}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s/{{ LOAD_DAGS_EXAMPLES }}/${LOAD_DAGS_EXAMPLES}/" $AIRFLOW_HOME/airflow.cfg
sed -i "s#{{ FLOWER_URL_PREFIX }}#${FLOWER_URL_PREFIX}#" $AIRFLOW_HOME/airflow.cfg
sed -i "s#{{ AIRFLOW_URL_PREFIX }}#${AIRFLOW_URL_PREFIX}#" $AIRFLOW_HOME/airflow.cfg
# wait for rabbitmq
if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then
j=0
while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:$RABBITMQ_MANAGEMENT_PORT/api/whoami |grep '200 OK'; do
j=`expr $j + 1`
if [ $j -ge $TRY_LOOP ]; then
echo "$(date) - $RABBITMQ_HOST still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP"
sleep 5
done
fi
# wait for postgres
if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "test" ] ; then
i=0
while ! nc -z $POSTGRES_HOST $POSTGRES_PORT; do
i=`expr $i + 1`
if [ $i -ge $TRY_LOOP ]; then
echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP"
sleep 5
done
# TODO: move to a Helm hook
# https://github.com/kubernetes/helm/blob/master/docs/charts_hooks.md
if [ "$1" = "webserver" ]; then
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "Initialize database..."
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
$CMD initdb
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "setting up connections..."
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
python setup_connections.py
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
fi
if [ "$1" = "test" ]; then
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "Initialize database..."
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
$CMD initdb
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "setting up connections..."
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
python setup_connections.py
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "Running tests..."
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
python -m unittest discover
echo "----------------------------------------------------------------------"
echo "----------------------------------------------------------------------"
echo "exiting.."
exit 1
fi
fi
# start jupyter notebook
"jupyter-notebook", "--ip=0.0.0.0", "--allow-root"
$CMD "$@"
我尝试阅读某些文章和答案,他们陈述了如何使用port和ip启动jupyter笔记本的方法,有人陈述了编写docker-compose的方法,所有陈述的方法都是徒劳的。
我希望将jupyter笔记本用作localhost:8888
,就像我通过localhost:8080
访问气流Web服务器一样。但是,现在,我遇到错误:连接被拒绝