气流安装| Ansible

时间:2018-12-27 10:46:18

标签: sqlalchemy cryptography ansible airflow

点击下面的链接后,我尝试使用Ansible安装Airflow(1.10.0)。

https://airflow.apache.org/installation.html

config.yml

---

- name: Airflow | Config | Ensure airflow directories structure
  file:
    path: "{{ item }}"
    state: directory
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
  with_items:
    - "{{ airflow_logs_folder }}"
    - "{{ airflow_child_process_log_folder }}"
    - "{{ airflow_dags_folder }}"
    - "{{ airflow_plugins_folder }}"

- name: Airflow | Config | Copy gunicorn logrotate config
  template:
    src: gunicorn-logrotate.j2
    dest: /etc/logrotate.d/airflow
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0644
  become: yes
  become_method: sudo
  become_user: root

- name: Airflow | Config | Copy sample dag hello_world
  copy:
    src: "{{ airflow_home }}/cng-ansible/roles/airflow/files/cng-hello_world.py"
    dest: "{{ airflow_dags_folder }}/cng-hello_world.py"
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0644
    remote_src: True

- name: Airflow | Config | Synchronization of DAGs
  synchronize:
    src: "{{ airflow_home }}/cng-ansible/roles/airflow/files/"
    dest: "{{ airflow_dags_folder }}"

- name: Airflow | Config | Install airflow environmet file
  template:
    src: airflow-environment-file.j2
    dest: "{{ airflow_environment_file_folder }}/airflow"
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640

- name: Airflow | Config | Initialize Airflow Database
  shell: "{{ airflow_executable }} initdb"
  args:
    chdir: "{{ airflow_home }}"
    executable: /bin/bash
  become: yes
  become_method: sudo
  become_user: "{{ airflow_user }}"

- name: Airflow | Config | Copy basic airflow config file
  template:
    src: airflow.cfg.j2
    dest: "{{ airflow_home }}/airflow/airflow.cfg"
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  register: airflow_config
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Install webserver systemd unit file
  template:
    src: airflow-webserver.service.j2
    dest: /usr/lib/systemd/system/airflow-webserver.service
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  register: airflow_config
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Install scheduler systemd unit file
  template:
    src: airflow-scheduler.service.j2
    dest: /usr/lib/systemd/system/airflow-scheduler.service
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  register: airflow_config
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Install worker systemd unit file
  template:
    src: airflow-worker.service.j2
    dest: /usr/lib/systemd/system/airflow-worker.service
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  register: airflow_config
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Copy extra airflow config files (provided by playbooks)
  copy:
    src: "{{ item }}"
    dest: "{{ airflow_home }}/{{ item | basename }}"
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  with_fileglob:
    - "{{ airflow_extra_conf_path }}/*"
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Copy extra airflow config templates (provided by playbooks)
  template:
    src: "{{ item }}"
    dest: "{{ airflow_home }}/{{ item | basename }}"
    owner: "{{ airflow_user }}"
    group: "{{ airflow_group }}"
    mode: 0640
  with_fileglob:
    - "{{ airflow_extra_conf_template_path }}/*"
  notify:
    - restart airflow-webserver
    - restart airflow-scheduler
    - restart airflow-worker

- name: Airflow | Config | Add variables from configuration file
  command: "{{ airflow_executable }} variables -s {{ item.key }} {{ item.value }}"
  environment:
    AIRFLOW_HOME: "{{ airflow_home }}"
  become: true
  become_user: "{{ airflow_user }}"
  with_items: "{{ airflow_admin_variables }}"
  tags:
    skip_ansible_lint

- name: Airflow | Config | Add connections from configuration file
  command: "{{ airflow_executable }} connections -a {% for key, value in item.iteritems() %}--{{ key }} '{{ value }}' {% endfor %}"
  environment:
    AIRFLOW_HOME: "{{ airflow_home }}"
  become: true
  become_user: "{{ airflow_user }}"
  with_items: "{{ airflow_admin_connections }}"
  tags:
    skip_ansible_lint

service.yml

---
- name: Airflow | Services |Configuring service
  systemd:
    name: "{{ item.key }}"
    state: "{{ item.value.state }}"
    enabled: "{{ item.value.enabled }}"
    daemon_reload: yes
  become: yes
  become_method: sudo
  become_user: root
  with_dict: "{{ airflow_services }}"
  when: "{{ item.value.enabled }}"
  changed_when: false

database.yml

---
- name: Airflow | DB | Uninstall markupsafe
  pip:
    name: markupsafe
    state: absent

- name: Airflow | DB | Install markupsafe
  pip:
    name: markupsafe
    version: latest

- name: Airflow | DB | Set PostgreSQL environment variables
  template:
    src: postgres.sh.j2
    dest: /etc/profile.d/postgres.sh
    mode: 0644
  notify: restart postgresql

- name: Airflow | DB | Ensure PostgreSQL data directory exists
  file:
    path: "{{ postgresql_data_dir }}"
    owner: "{{ postgresql_user }}"
    group: "{{ postgresql_group }}"
    state: directory
    mode: 0700
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart postgresql

- name: Airflow | DB | Check if PostgreSQL database is initialized
  stat:
    path: "{{ postgresql_data_dir }}/PG_VERSION"
  register: file_exists

- name: Airflow | DB | Initialize PostgreSQL Database
  command: "{{ airflow_executable_pgsql }} initdb"
  when: not file_exists.stat.exists
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart postgresql

- name: Airflow | DB | Copy Postgresql hba file
  copy:
    src: ../templates/pg_hba.conf.j2
    dest: "{{ postgresql_data_dir }}/pg_hba.conf"
    owner: "{{ postgresql_user }}"
    group: "{{ postgresql_group }}"
    mode: 0600
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart postgresql

- name: Airflow | DB | Copy Postgresql config file
  copy:
    src: ../templates/postgresql.conf.j2
    dest: "{{ postgresql_data_dir }}/postgresql.conf.j2"
    owner: "{{ postgresql_user }}"
    group: "{{ postgresql_group }}"
    mode: 0600
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart postgresql

- name: Airflow | DB | Restart PostgreSQL
  shell: "systemctl restart postgresql"
  become: yes
  become_method: sudo
  become_user: root

- name: Airflow | DB | Postgresql Create DB
  postgresql_db:
    name: airflow

- name: Airflow | DB | Postgresql User
  postgresql_user:
    db: airflow
    name: airflow
    password: airflow
    priv: "ALL"
    expires: infinity
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart postgresql

- name: Airflow | DB | Postgresql Privileges
  postgresql_privs:
    db: airflow
    objs: ALL_DEFAULT
    privs: ALL
    type: default_privs
    role: airflow
    grant_option: yes

- name: Airflow | DB | Restart RabbitMQ-Server
  shell: "systemctl restart rabbitmq-server"
  become: yes
  become_method: sudo
  become_user: root

- name: Airflow | DB | RabbitMQ Add v_host
  rabbitmq_vhost:
    name: af-host
    state: present

- name: Airflow | DB |  RabbitMQ User
  rabbitmq_user:
    user: airflow
    password: airflow
    tags: airflow-user
    vhost: af-host
    configure_priv: .*
    read_priv: .*
    write_priv: .*
    state: present
    force: yes
  become: yes
  become_method: sudo
  become_user: root
  register: airflow_dbsetup
  notify:
    - restart rabbitmq-server

- name: Airflow | DB | Create MySQL DB
  mysql_db:
    name: airflow
    state: present

- name: MySQL user
  mysql_user:
    name: airflow
    password: airflow
    priv: '*.*:ALL'
    state: present

#- name: CREATE USER
#  shell: "sudo -i -u postgres psql -c "CREATE USER airflow WITH PASSWORD 'airflow';""

#- name: CREATE DATABASE
#  shell: "sudo -i -u postgres psql -c "CREATE DATABASE airflow;""

#- name: GRANT PRIVILEGES ON DATABASE
#  shell: "sudo -i -u postgres psql -c "GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow;""

#- name: GRANT PRIVILEGES ON TABLES
#  shell: "sudo -i -u postgres psql -c "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO airflow;""

install-up.yml

- name: Airflow | Install Pip | Check to see if pip is already installed
  command: "pip --version"
  ignore_errors: true
  register: pip_is_installed
  changed_when: false

- block:

    - name: Download get-pip.py
      get_url: url=https://bootstrap.pypa.io/get-pip.py  dest=/tmp

    - name: Install pip
      command: "python /tmp/get-pip.py"

    - name: Delete get-pip.py
      file: state=absent path=/tmp/get-pip.py

  when: pip_is_installed.rc != 0

install.yml

---
- name: Airflow | Install | Basic Packages
  yum:
     name: "{{ packages }}"
  vars:
    packages:
    - gcc
    - gcc-c++
    - zlib-devel
    - bzip2-devel
    - openssl-devel
    - ncurses-devel
    - sqlite-devel
    - cyrus-sasl-devel
    - postgresql
    - postgresql-server
    - mariadb-server
    - mariadb
    - python-pip
    - python-devel
    - mysql-devel
    - python-setuptools
    - java-1.8.0-openjdk.x86_64
    - MySQL-python
  register: airflow_dbsetup
  notify:
      - restart postgresql
      - restart rabbitmq-server
      - restart mariadb

- name: Airflow | Install | Upgrade pip
  shell: "pip install --upgrade pip"

- name: Airflow | Install | Upgrade setuptools
  shell: "pip install --upgrade setuptools"

- name: Airflow | Inatall | Start mariadb
  systemd: state=started name=mariadb daemon_reload=yes
  sudo: yes

- name: Airflow | Install | Group dev
  yum:
      name: "@Development"
      state: latest

- name: Airflow | Install | Numpy
  pip:
     name: numpy
     version: latest
  sudo: yes

- name: Airflow | Install | cython
  pip:
     name: cython
     version: latest
  sudo: yes

- name: Airflow | Install | With pip
  pip:
     name: apache-airflow
     version: 1.10.0

- name: Airflow | Install | crypto
  pip:
     name: apache-airflow[crypto]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | hive
  pip:
     name: apache-airflow[hive]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | jdbc
  pip:
     name: apache-airflow[jdbc]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | password
  pip:
     name: apache-airflow[password]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | s3
  pip:
     name: apache-airflow[s3]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | slack
  pip:
     name: apache-airflow[slack]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | ssh
  pip:
     name: apache-airflow[ssh]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | Degrade pip
  shell: "pip install --upgrade --force-reinstall pip==9.0.0"

- name: Airflow | Install | devel
  pip:
     name: apache-airflow[devel]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Inatall | MSSql
  pip:
     name: apache-airflow[mssql]
     version: 1.10.0
  register: airflow_install

- name: Airflow | Install | MySQL-python
  pip:
     name: MySQL-python

- name: Airflow | Install | Celery
  pip:
     name: celery

- name: Airflow | Install | psycopg2
  pip:
     name: psycopg2

- name: Airflow | Inatall | psycopg2-binary
  pip:
     name: psycopg2-binary

- name: Airflow | Install | erlang
  yum:
      name: https://github.com/rabbitmq/erlang-rpm/releases/download/v20.1.7/erlang-20.1.7-1.el6.x86_64.rpm
      state: present

- name: Airflow | Install | socat
  yum:
     name: socat
     state: present

- name: Airflow | Install | Rabbitmq
  yum:
      name: https://dl.bintray.com/rabbitmq/all/rabbitmq-server/3.7.8/rabbitmq-server-3.7.8-1.el7.noarch.rpm
      state: present

气流成功运行,但是当我说systemctl status airflow-webserver.service时,出现以下错误。

[root@localhost ~]# systemctl status airflow-webserver.service
● airflow-webserver.service - Airflow webserver daemon
   Loaded: loaded (/usr/lib/systemd/system/airflow-webserver.service; enabled; vendor preset: disabled)
   Active: activating (auto-restart) (Result: exit-code) since Wed 2018-12-26 05:01:22 GMT; 9s ago
  Process: 18838 ExecStart=/usr/bin/airflow webserver --pid /home/ec2-user/airflow/webserver.pid (code=exited, status=1/FAILURE)
 Main PID: 18838 (code=exited, status=1/FAILURE)
   CGroup: /system.slice/airflow-webserver.service

Dec 26 05:01:22 localhost.localdomain systemd[1]: airflow-webserver.service: main process exited, code=exited, status=1/FAILURE
Dec 26 05:01:22 localhost.localdomain systemd[1]: Unit airflow-webserver.service entered failed state.
Dec 26 05:01:22 localhost.localdomain systemd[1]: airflow-webserver.service failed.

airflow.cfg

[root@localhost airflow]# cat airflow.cfg 
[core]
airflow_home = /root/airflow
dags_folder = /root/airflow/dags
base_log_folder = /root/airflow/logs

remote_logging = False
remote_log_conn_id =
remote_base_log_folder =
encrypt_s3_logs = False
logging_level = INFO
fab_logging_level = WARN
logging_config_class =

log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
hostname_callable = socket:getfqdn

default_timezone = utc

executor = SequentialExecutor
sql_alchemy_conn = sqlite:////root/airflow/airflow.db
sql_alchemy_pool_enabled = True

sql_alchemy_pool_size = 5

sql_alchemy_pool_recycle = 1800
sql_alchemy_reconnect_timeout = 300

parallelism = 32

dag_concurrency = 16

dags_are_paused_at_creation = True

non_pooled_task_slot_count = 128

max_active_runs_per_dag = 16

load_examples = True

plugins_folder = /root/airflow/plugins

fernet_key = _eooBh6cIC0cBxvNyvwc3n8kjHNniGAPJXWkBK8n6rI=

donot_pickle = False

dagbag_import_timeout = 30

task_runner = BashTaskRunner

default_impersonation =

security =

secure_mode = False

unit_test_mode = False

task_log_reader = task

enable_xcom_pickling = True

killed_task_cleanup_time = 60

dag_run_conf_overrides_params = False

[cli]
api_client = airflow.api.client.local_client

endpoint_url = http://localhost:8080

[api]
auth_backend = airflow.api.auth.backend.default

[lineage]
backend =

[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =

[operators]
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = Airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0

[hive]
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =

[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is used in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080

# The ip specified when starting the web server
web_server_host = 0.0.0.0

# The port on which to run the web server
web_server_port = 8080

web_server_ssl_cert =
web_server_ssl_key =

web_server_master_timeout = 120

web_server_worker_timeout = 120

worker_refresh_batch_size = 1

worker_refresh_interval = 30

secret_key = temporary_key

workers = 4

worker_class = sync

access_logfile = -
error_logfile = -

expose_config = False

authenticate = False

filter_by_owner = False

owner_mode = user

dag_default_view = tree

dag_orientation = LR

demo_mode = False

log_fetch_timeout_sec = 5

hide_paused_dags_by_default = False

page_size = 100

rbac = False

navbar_color = #007A87

default_dag_run_display_number = 25


[email]
email_backend = airflow.utils.email.send_email_smtp


[smtp]
smtp_host = localhost
smtp_starttls = True
smtp_ssl = False
smtp_port = 25
smtp_mail_from = airflow@example.com


[celery]
celery_app_name = airflow.executors.celery_executor

worker_concurrency = 16

worker_log_server_port = 8793

broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow

result_backend = db+mysql://airflow:airflow@localhost:3306/airflow

# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the IP that Celery Flower runs on
flower_host = 0.0.0.0

# The root URL for Flower
# Ex: flower_url_prefix = /flower
flower_url_prefix =

# This defines the port that Celery Flower runs on
flower_port = 5555

# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG

# In case of using SSL
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =

[celery_broker_transport_options]

[dask]
cluster_address = 127.0.0.1:8786
tls_ca =
tls_cert =
tls_key =


[scheduler]
job_heartbeat_sec = 5

scheduler_heartbeat_sec = 5

run_duration = -1

min_file_process_interval = 0

min_file_parsing_loop_time = 1

dag_dir_list_interval = 300

print_stats_interval = 30

child_process_log_directory = /root/airflow/logs/scheduler

scheduler_zombie_task_threshold = 300

catchup_by_default = True

max_tis_per_query = 512

statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow

max_threads = 2

authenticate = False

[ldap]
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

[mesos]
master = localhost:5050

framework_name = Airflow

task_cpu = 1

task_memory = 256

checkpoint = False

authenticate = False


[kerberos]
ccache = /tmp/airflow_krb5_ccache
# gets augmented with fqdn
principal = airflow
reinit_frequency = 3600
kinit_path = kinit
keytab = airflow.keytab


[github_enterprise]
api_rev = v3

[admin]
hide_sensitive_variable_fields = True

[elasticsearch]
elasticsearch_host =
elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
elasticsearch_end_of_log_mark = end_of_log

[kubernetes]
worker_container_repository =
worker_container_tag =

delete_worker_pods = True

namespace = default

airflow_configmap =

dags_volume_subpath =

dags_volume_claim =

logs_volume_subpath =

logs_volume_claim =

git_repo =
git_branch =
git_user =
git_password =
git_subpath =

git_sync_container_repository = gcr.io/google-containers/git-sync-amd64
git_sync_container_tag = v2.0.5
git_sync_init_container_name = git-sync-clone

worker_service_account_name =

image_pull_secrets =

gcp_service_account_keys =

in_cluster = True

[kubernetes_secrets]

1 个答案:

答案 0 :(得分:1)

确保系统上存在cryptograpghy的依赖项:

Debian或Ubuntu衍生的发行版

apt-get install build-essential libssl-dev libffi-dev python-dev followed by

pip install cryptography

Red Hat派生的发行版

yum install gcc openssl-devel libffi-devel python-devel followed by

pip install cryptography

我建议您先手动尝试一下,然后使用Ansible将其自动化。