点击下面的链接后,我尝试使用Ansible安装Airflow(1.10.0)。
https://airflow.apache.org/installation.html
config.yml
---
- name: Airflow | Config | Ensure airflow directories structure
file:
path: "{{ item }}"
state: directory
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
with_items:
- "{{ airflow_logs_folder }}"
- "{{ airflow_child_process_log_folder }}"
- "{{ airflow_dags_folder }}"
- "{{ airflow_plugins_folder }}"
- name: Airflow | Config | Copy gunicorn logrotate config
template:
src: gunicorn-logrotate.j2
dest: /etc/logrotate.d/airflow
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0644
become: yes
become_method: sudo
become_user: root
- name: Airflow | Config | Copy sample dag hello_world
copy:
src: "{{ airflow_home }}/cng-ansible/roles/airflow/files/cng-hello_world.py"
dest: "{{ airflow_dags_folder }}/cng-hello_world.py"
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0644
remote_src: True
- name: Airflow | Config | Synchronization of DAGs
synchronize:
src: "{{ airflow_home }}/cng-ansible/roles/airflow/files/"
dest: "{{ airflow_dags_folder }}"
- name: Airflow | Config | Install airflow environmet file
template:
src: airflow-environment-file.j2
dest: "{{ airflow_environment_file_folder }}/airflow"
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
- name: Airflow | Config | Initialize Airflow Database
shell: "{{ airflow_executable }} initdb"
args:
chdir: "{{ airflow_home }}"
executable: /bin/bash
become: yes
become_method: sudo
become_user: "{{ airflow_user }}"
- name: Airflow | Config | Copy basic airflow config file
template:
src: airflow.cfg.j2
dest: "{{ airflow_home }}/airflow/airflow.cfg"
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
register: airflow_config
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Install webserver systemd unit file
template:
src: airflow-webserver.service.j2
dest: /usr/lib/systemd/system/airflow-webserver.service
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
register: airflow_config
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Install scheduler systemd unit file
template:
src: airflow-scheduler.service.j2
dest: /usr/lib/systemd/system/airflow-scheduler.service
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
register: airflow_config
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Install worker systemd unit file
template:
src: airflow-worker.service.j2
dest: /usr/lib/systemd/system/airflow-worker.service
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
register: airflow_config
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Copy extra airflow config files (provided by playbooks)
copy:
src: "{{ item }}"
dest: "{{ airflow_home }}/{{ item | basename }}"
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
with_fileglob:
- "{{ airflow_extra_conf_path }}/*"
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Copy extra airflow config templates (provided by playbooks)
template:
src: "{{ item }}"
dest: "{{ airflow_home }}/{{ item | basename }}"
owner: "{{ airflow_user }}"
group: "{{ airflow_group }}"
mode: 0640
with_fileglob:
- "{{ airflow_extra_conf_template_path }}/*"
notify:
- restart airflow-webserver
- restart airflow-scheduler
- restart airflow-worker
- name: Airflow | Config | Add variables from configuration file
command: "{{ airflow_executable }} variables -s {{ item.key }} {{ item.value }}"
environment:
AIRFLOW_HOME: "{{ airflow_home }}"
become: true
become_user: "{{ airflow_user }}"
with_items: "{{ airflow_admin_variables }}"
tags:
skip_ansible_lint
- name: Airflow | Config | Add connections from configuration file
command: "{{ airflow_executable }} connections -a {% for key, value in item.iteritems() %}--{{ key }} '{{ value }}' {% endfor %}"
environment:
AIRFLOW_HOME: "{{ airflow_home }}"
become: true
become_user: "{{ airflow_user }}"
with_items: "{{ airflow_admin_connections }}"
tags:
skip_ansible_lint
service.yml
---
- name: Airflow | Services |Configuring service
systemd:
name: "{{ item.key }}"
state: "{{ item.value.state }}"
enabled: "{{ item.value.enabled }}"
daemon_reload: yes
become: yes
become_method: sudo
become_user: root
with_dict: "{{ airflow_services }}"
when: "{{ item.value.enabled }}"
changed_when: false
database.yml
---
- name: Airflow | DB | Uninstall markupsafe
pip:
name: markupsafe
state: absent
- name: Airflow | DB | Install markupsafe
pip:
name: markupsafe
version: latest
- name: Airflow | DB | Set PostgreSQL environment variables
template:
src: postgres.sh.j2
dest: /etc/profile.d/postgres.sh
mode: 0644
notify: restart postgresql
- name: Airflow | DB | Ensure PostgreSQL data directory exists
file:
path: "{{ postgresql_data_dir }}"
owner: "{{ postgresql_user }}"
group: "{{ postgresql_group }}"
state: directory
mode: 0700
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart postgresql
- name: Airflow | DB | Check if PostgreSQL database is initialized
stat:
path: "{{ postgresql_data_dir }}/PG_VERSION"
register: file_exists
- name: Airflow | DB | Initialize PostgreSQL Database
command: "{{ airflow_executable_pgsql }} initdb"
when: not file_exists.stat.exists
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart postgresql
- name: Airflow | DB | Copy Postgresql hba file
copy:
src: ../templates/pg_hba.conf.j2
dest: "{{ postgresql_data_dir }}/pg_hba.conf"
owner: "{{ postgresql_user }}"
group: "{{ postgresql_group }}"
mode: 0600
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart postgresql
- name: Airflow | DB | Copy Postgresql config file
copy:
src: ../templates/postgresql.conf.j2
dest: "{{ postgresql_data_dir }}/postgresql.conf.j2"
owner: "{{ postgresql_user }}"
group: "{{ postgresql_group }}"
mode: 0600
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart postgresql
- name: Airflow | DB | Restart PostgreSQL
shell: "systemctl restart postgresql"
become: yes
become_method: sudo
become_user: root
- name: Airflow | DB | Postgresql Create DB
postgresql_db:
name: airflow
- name: Airflow | DB | Postgresql User
postgresql_user:
db: airflow
name: airflow
password: airflow
priv: "ALL"
expires: infinity
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart postgresql
- name: Airflow | DB | Postgresql Privileges
postgresql_privs:
db: airflow
objs: ALL_DEFAULT
privs: ALL
type: default_privs
role: airflow
grant_option: yes
- name: Airflow | DB | Restart RabbitMQ-Server
shell: "systemctl restart rabbitmq-server"
become: yes
become_method: sudo
become_user: root
- name: Airflow | DB | RabbitMQ Add v_host
rabbitmq_vhost:
name: af-host
state: present
- name: Airflow | DB | RabbitMQ User
rabbitmq_user:
user: airflow
password: airflow
tags: airflow-user
vhost: af-host
configure_priv: .*
read_priv: .*
write_priv: .*
state: present
force: yes
become: yes
become_method: sudo
become_user: root
register: airflow_dbsetup
notify:
- restart rabbitmq-server
- name: Airflow | DB | Create MySQL DB
mysql_db:
name: airflow
state: present
- name: MySQL user
mysql_user:
name: airflow
password: airflow
priv: '*.*:ALL'
state: present
#- name: CREATE USER
# shell: "sudo -i -u postgres psql -c "CREATE USER airflow WITH PASSWORD 'airflow';""
#- name: CREATE DATABASE
# shell: "sudo -i -u postgres psql -c "CREATE DATABASE airflow;""
#- name: GRANT PRIVILEGES ON DATABASE
# shell: "sudo -i -u postgres psql -c "GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow;""
#- name: GRANT PRIVILEGES ON TABLES
# shell: "sudo -i -u postgres psql -c "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO airflow;""
install-up.yml
- name: Airflow | Install Pip | Check to see if pip is already installed
command: "pip --version"
ignore_errors: true
register: pip_is_installed
changed_when: false
- block:
- name: Download get-pip.py
get_url: url=https://bootstrap.pypa.io/get-pip.py dest=/tmp
- name: Install pip
command: "python /tmp/get-pip.py"
- name: Delete get-pip.py
file: state=absent path=/tmp/get-pip.py
when: pip_is_installed.rc != 0
install.yml
---
- name: Airflow | Install | Basic Packages
yum:
name: "{{ packages }}"
vars:
packages:
- gcc
- gcc-c++
- zlib-devel
- bzip2-devel
- openssl-devel
- ncurses-devel
- sqlite-devel
- cyrus-sasl-devel
- postgresql
- postgresql-server
- mariadb-server
- mariadb
- python-pip
- python-devel
- mysql-devel
- python-setuptools
- java-1.8.0-openjdk.x86_64
- MySQL-python
register: airflow_dbsetup
notify:
- restart postgresql
- restart rabbitmq-server
- restart mariadb
- name: Airflow | Install | Upgrade pip
shell: "pip install --upgrade pip"
- name: Airflow | Install | Upgrade setuptools
shell: "pip install --upgrade setuptools"
- name: Airflow | Inatall | Start mariadb
systemd: state=started name=mariadb daemon_reload=yes
sudo: yes
- name: Airflow | Install | Group dev
yum:
name: "@Development"
state: latest
- name: Airflow | Install | Numpy
pip:
name: numpy
version: latest
sudo: yes
- name: Airflow | Install | cython
pip:
name: cython
version: latest
sudo: yes
- name: Airflow | Install | With pip
pip:
name: apache-airflow
version: 1.10.0
- name: Airflow | Install | crypto
pip:
name: apache-airflow[crypto]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | hive
pip:
name: apache-airflow[hive]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | jdbc
pip:
name: apache-airflow[jdbc]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | password
pip:
name: apache-airflow[password]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | s3
pip:
name: apache-airflow[s3]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | slack
pip:
name: apache-airflow[slack]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | ssh
pip:
name: apache-airflow[ssh]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | Degrade pip
shell: "pip install --upgrade --force-reinstall pip==9.0.0"
- name: Airflow | Install | devel
pip:
name: apache-airflow[devel]
version: 1.10.0
register: airflow_install
- name: Airflow | Inatall | MSSql
pip:
name: apache-airflow[mssql]
version: 1.10.0
register: airflow_install
- name: Airflow | Install | MySQL-python
pip:
name: MySQL-python
- name: Airflow | Install | Celery
pip:
name: celery
- name: Airflow | Install | psycopg2
pip:
name: psycopg2
- name: Airflow | Inatall | psycopg2-binary
pip:
name: psycopg2-binary
- name: Airflow | Install | erlang
yum:
name: https://github.com/rabbitmq/erlang-rpm/releases/download/v20.1.7/erlang-20.1.7-1.el6.x86_64.rpm
state: present
- name: Airflow | Install | socat
yum:
name: socat
state: present
- name: Airflow | Install | Rabbitmq
yum:
name: https://dl.bintray.com/rabbitmq/all/rabbitmq-server/3.7.8/rabbitmq-server-3.7.8-1.el7.noarch.rpm
state: present
气流成功运行,但是当我说systemctl status airflow-webserver.service
时,出现以下错误。
[root@localhost ~]# systemctl status airflow-webserver.service
● airflow-webserver.service - Airflow webserver daemon
Loaded: loaded (/usr/lib/systemd/system/airflow-webserver.service; enabled; vendor preset: disabled)
Active: activating (auto-restart) (Result: exit-code) since Wed 2018-12-26 05:01:22 GMT; 9s ago
Process: 18838 ExecStart=/usr/bin/airflow webserver --pid /home/ec2-user/airflow/webserver.pid (code=exited, status=1/FAILURE)
Main PID: 18838 (code=exited, status=1/FAILURE)
CGroup: /system.slice/airflow-webserver.service
Dec 26 05:01:22 localhost.localdomain systemd[1]: airflow-webserver.service: main process exited, code=exited, status=1/FAILURE
Dec 26 05:01:22 localhost.localdomain systemd[1]: Unit airflow-webserver.service entered failed state.
Dec 26 05:01:22 localhost.localdomain systemd[1]: airflow-webserver.service failed.
airflow.cfg
[root@localhost airflow]# cat airflow.cfg
[core]
airflow_home = /root/airflow
dags_folder = /root/airflow/dags
base_log_folder = /root/airflow/logs
remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
logging_level = INFO
fab_logging_level = WARN
logging_config_class =
log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
hostname_callable = socket:getfqdn
default_timezone = utc
executor = SequentialExecutor
sql_alchemy_conn = sqlite:////root/airflow/airflow.db
sql_alchemy_pool_enabled = True
sql_alchemy_pool_size = 5
sql_alchemy_pool_recycle = 1800
sql_alchemy_reconnect_timeout = 300
parallelism = 32
dag_concurrency = 16
dags_are_paused_at_creation = True
non_pooled_task_slot_count = 128
max_active_runs_per_dag = 16
load_examples = True
plugins_folder = /root/airflow/plugins
fernet_key = _eooBh6cIC0cBxvNyvwc3n8kjHNniGAPJXWkBK8n6rI=
donot_pickle = False
dagbag_import_timeout = 30
task_runner = BashTaskRunner
default_impersonation =
security =
secure_mode = False
unit_test_mode = False
task_log_reader = task
enable_xcom_pickling = True
killed_task_cleanup_time = 60
dag_run_conf_overrides_params = False
[cli]
api_client = airflow.api.client.local_client
endpoint_url = http://localhost:8080
[api]
auth_backend = airflow.api.auth.backend.default
[lineage]
backend =
[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =
[operators]
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = Airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
[hive]
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =
[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080
# The ip specified when starting the web server
web_server_host = 0.0.0.0
# The port on which to run the web server
web_server_port = 8080
web_server_ssl_cert =
web_server_ssl_key =
web_server_master_timeout = 120
web_server_worker_timeout = 120
worker_refresh_batch_size = 1
worker_refresh_interval = 30
secret_key = temporary_key
workers = 4
worker_class = sync
access_logfile = -
error_logfile = -
expose_config = False
authenticate = False
filter_by_owner = False
owner_mode = user
dag_default_view = tree
dag_orientation = LR
demo_mode = False
log_fetch_timeout_sec = 5
hide_paused_dags_by_default = False
page_size = 100
rbac = False
navbar_color = #007A87
default_dag_run_display_number = 25
[email]
email_backend = airflow.utils.email.send_email_smtp
[smtp]
smtp_host = localhost
smtp_starttls = True
smtp_ssl = False
smtp_port = 25
smtp_mail_from = airflow@example.com
[celery]
celery_app_name = airflow.executors.celery_executor
worker_concurrency = 16
worker_log_server_port = 8793
broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the IP that Celery Flower runs on
flower_host = 0.0.0.0
# The root URL for Flower
# Ex: flower_url_prefix = /flower
flower_url_prefix =
# This defines the port that Celery Flower runs on
flower_port = 5555
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default
# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
[celery_broker_transport_options]
[dask]
cluster_address = 127.0.0.1:8786
tls_ca =
tls_cert =
tls_key =
[scheduler]
job_heartbeat_sec = 5
scheduler_heartbeat_sec = 5
run_duration = -1
min_file_process_interval = 0
min_file_parsing_loop_time = 1
dag_dir_list_interval = 300
print_stats_interval = 30
child_process_log_directory = /root/airflow/logs/scheduler
scheduler_zombie_task_threshold = 300
catchup_by_default = True
max_tis_per_query = 512
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow
max_threads = 2
authenticate = False
[ldap]
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL
[mesos]
master = localhost:5050
framework_name = Airflow
task_cpu = 1
task_memory = 256
checkpoint = False
authenticate = False
[kerberos]
ccache = /tmp/airflow_krb5_ccache
# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab
[github_enterprise]
api_rev = v3
[admin]
hide_sensitive_variable_fields = True
[elasticsearch]
elasticsearch_host =
elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
elasticsearch_end_of_log_mark = end_of_log
[kubernetes]
worker_container_repository =
worker_container_tag =
delete_worker_pods = True
namespace = default
airflow_configmap =
dags_volume_subpath =
dags_volume_claim =
logs_volume_subpath =
logs_volume_claim =
git_repo =
git_branch =
git_user =
git_password =
git_subpath =
git_sync_container_repository = gcr.io/google-containers/git-sync-amd64
git_sync_container_tag = v2.0.5
git_sync_init_container_name = git-sync-clone
worker_service_account_name =
image_pull_secrets =
gcp_service_account_keys =
in_cluster = True
[kubernetes_secrets]
答案 0 :(得分:1)
确保系统上存在cryptograpghy
的依赖项:
Debian或Ubuntu衍生的发行版
apt-get install build-essential libssl-dev libffi-dev python-dev followed by
pip install cryptography
Red Hat派生的发行版
yum install gcc openssl-devel libffi-devel python-devel followed by
pip install cryptography
我建议您先手动尝试一下,然后使用Ansible将其自动化。