From 8ea187d3414aa869d6b503ab91061bbf292c184d Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 3 Jul 2024 11:40:15 -0700 Subject: [PATCH 1/3] Get everything to run on Rocky. This would let us use Podman. --- group_vars/nomad_clients.yml | 1 + group_vars/nomad_cluster.yml | 4 +- inventory/all_projects/nomad | 2 + roles/ansible-nomad/defaults/main.yml | 2 + roles/ansible-nomad/tasks/main.yml | 1 - roles/ansible-nomad/vars/RedHat.yml | 2 +- roles/common/tasks/main.yml | 2 +- .../default/group_vars/nomad_clients.yml | 5 +- .../default/group_vars/nomad_cluster.yml | 3 + roles/pul_nomad/molecule/default/molecule.yml | 9 +-- roles/pul_nomad/molecule/default/prepare.yml | 30 +++++---- roles/pul_nomad/molecule/default/verify.yml | 23 ++----- roles/pul_nomad/tasks/main.yml | 62 ++++++++++++++----- 13 files changed, 93 insertions(+), 53 deletions(-) diff --git a/group_vars/nomad_clients.yml b/group_vars/nomad_clients.yml index 6343d6aa0c..a61a140611 100644 --- a/group_vars/nomad_clients.yml +++ b/group_vars/nomad_clients.yml @@ -1,3 +1,4 @@ consul_node_role: "client" nomad_node_role: "client" nomad_docker_enable: true +nomad_podman_enable: true diff --git a/group_vars/nomad_cluster.yml b/group_vars/nomad_cluster.yml index 6f6f1ecba1..a5aca279f7 100644 --- a/group_vars/nomad_cluster.yml +++ b/group_vars/nomad_cluster.yml @@ -19,5 +19,7 @@ nomad_acl_enabled: true pul_nomad_management_token: "{{ vault_pul_nomad_management_token }}" tower_github_token: "{{ vault_tower_github_token }}" ## Don't install common packages, this just runs containers. -common_packages: [jq] +common_packages: [] deploy_user_uid: 1004 +# Use latest nomad podman plugin +nomad_podman_version: '0.5.2' diff --git a/inventory/all_projects/nomad b/inventory/all_projects/nomad index 60c04b843b..3107a05a58 100644 --- a/inventory/all_projects/nomad +++ b/inventory/all_projects/nomad @@ -4,8 +4,10 @@ nomad-host-prod2.lib.princeton.edu nomad-host-prod3.lib.princeton.edu [nomad_clients] nomad-client-prod1.lib.princeton.edu +nomad-client-prod2.lib.princeton.edu dpul-collections-staging1.lib.princeton.edu dpul-collections-staging2.lib.princeton.edu +nomad-client-prod2.lib.princeton.edu [nomad_cluster:children] nomad_servers nomad_clients diff --git a/roles/ansible-nomad/defaults/main.yml b/roles/ansible-nomad/defaults/main.yml index e3a72c116d..5899abcab3 100644 --- a/roles/ansible-nomad/defaults/main.yml +++ b/roles/ansible-nomad/defaults/main.yml @@ -27,6 +27,8 @@ os_supported_matrix: min_version: "20.04" VMware Photon OS: min_version: "4" + Rocky: + min_version: "" ## Core nomad_debug: false diff --git a/roles/ansible-nomad/tasks/main.yml b/roles/ansible-nomad/tasks/main.yml index 42f968fed0..ad07bbbb4a 100644 --- a/roles/ansible-nomad/tasks/main.yml +++ b/roles/ansible-nomad/tasks/main.yml @@ -214,4 +214,3 @@ name: nomad enabled: true state: started - when: not ansible_service_mgr == "systemd" diff --git a/roles/ansible-nomad/vars/RedHat.yml b/roles/ansible-nomad/vars/RedHat.yml index 115f892994..41cf71d417 100644 --- a/roles/ansible-nomad/vars/RedHat.yml +++ b/roles/ansible-nomad/vars/RedHat.yml @@ -2,7 +2,7 @@ # File: vars/RedHat.yml - Red Hat OS variables for Nomad nomad_os_packages: - - "{% if (ansible_distribution == 'AlmaLinux' and ansible_distribution_version is version('9', '>=')) %}curl-minimal{% else %}curl{% endif %}" + - "{% if (ansible_distribution == 'AlmaLinux' and ansible_distribution_version is version('9', '>=')) or (minimal_curl is defined) %}curl-minimal{% else %}curl{% endif %}" - git - "{% if (ansible_distribution == 'Fedora' and ansible_distribution_version is version('28', '<')) or (ansible_distribution == 'CentOS' and ansible_distribution_version is version('8', '<')) or (ansible_distribution == 'Amazon' and ansible_distribution_version is version('3', '<')) or (ansible_distribution == 'OracleLinux' and diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 4f1b70c316..3086b898e8 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -6,7 +6,7 @@ "ansible_hostname not in inventory_hostname" tags: "update_hostname" - name: common | Add apt HTTPS capabilities. - ansible.builtin.apt: + ansible.builtin.package: name: "{{ item }}" state: present loop: diff --git a/roles/pul_nomad/molecule/default/group_vars/nomad_clients.yml b/roles/pul_nomad/molecule/default/group_vars/nomad_clients.yml index 6343d6aa0c..4f141a7085 100644 --- a/roles/pul_nomad/molecule/default/group_vars/nomad_clients.yml +++ b/roles/pul_nomad/molecule/default/group_vars/nomad_clients.yml @@ -1,3 +1,6 @@ consul_node_role: "client" nomad_node_role: "client" -nomad_docker_enable: true +# nomad_docker_enable: true +nomad_podman_enable: true +# Use latest nomad podman plugin +nomad_podman_version: '0.5.2' diff --git a/roles/pul_nomad/molecule/default/group_vars/nomad_cluster.yml b/roles/pul_nomad/molecule/default/group_vars/nomad_cluster.yml index de427c3e5d..0b1993cfac 100644 --- a/roles/pul_nomad/molecule/default/group_vars/nomad_cluster.yml +++ b/roles/pul_nomad/molecule/default/group_vars/nomad_cluster.yml @@ -17,3 +17,6 @@ nomad_bind_address: '0.0.0.0' nomad_consul_token: "904c8247-82a1-429c-9682-30547919fd81" nomad_acl_enabled: true pul_nomad_management_token: "851ff1f8-58d9-4961-b6d1-b57cb972594f" +common_packages: [vim] +deploy_user_uid: 1004 +minimal_curl: true diff --git a/roles/pul_nomad/molecule/default/molecule.yml b/roles/pul_nomad/molecule/default/molecule.yml index 951ca83b23..d4ea0f6d66 100644 --- a/roles/pul_nomad/molecule/default/molecule.yml +++ b/roles/pul_nomad/molecule/default/molecule.yml @@ -22,7 +22,7 @@ lint: | ansible-lint platforms: - name: nomadserver1 - image: "quay.io/pulibrary/jammy-ansible:latest" + image: "eniocarboni/docker-rockylinux-systemd:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw @@ -44,8 +44,9 @@ platforms: published_ports: - 127.0.0.1:8500:8500 # consul port - 127.0.0.1:4646:4646 # Nomad port + # Debian box to ensure it can build a client on either. - name: nomadserver2 - image: "quay.io/pulibrary/jammy-ansible:latest" + image: "eniocarboni/docker-rockylinux-systemd:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw @@ -65,7 +66,7 @@ platforms: networks: - name: "nomad" - name: nomadserver3 - image: "quay.io/pulibrary/jammy-ansible:latest" + image: "eniocarboni/docker-rockylinux-systemd:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw @@ -85,7 +86,7 @@ platforms: networks: - name: "nomad" - name: nomadclient1 - image: "quay.io/pulibrary/jammy-ansible:latest" + image: "eniocarboni/docker-rockylinux-systemd:latest" command: "/sbin/init" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw diff --git a/roles/pul_nomad/molecule/default/prepare.yml b/roles/pul_nomad/molecule/default/prepare.yml index 186ad9b553..2815a718ab 100644 --- a/roles/pul_nomad/molecule/default/prepare.yml +++ b/roles/pul_nomad/molecule/default/prepare.yml @@ -2,21 +2,29 @@ - name: prepare hosts: all tasks: - - name: prepare | install iproute - ansible.builtin.apt: - name: "iproute2" + - name: prepare | install debian dependencies + ansible.builtin.package: + name: "{{ item }}" state: present update_cache: true - - name: prepare | install dmidecode - ansible.builtin.apt: - name: "dmidecode" - state: present - update_cache: true - - name: prepare | install dnsutils - ansible.builtin.apt: - name: "dnsutils" + loop: + - "iproute2" + - "dmidecode" + - "dnsutils" + when: + - "ansible_os_family == 'Debian'" + - name: prepare | install RedHat dependencies + ansible.builtin.package: + name: "{{ item }}" state: present update_cache: true + loop: + - "iproute" + - "dmidecode" + - "dnsutils" + - "sudo" + when: + - "ansible_os_family == 'RedHat'" # Disable some docker things to let it work in molecule. - name: Create a directory if it does not exist ansible.builtin.file: diff --git a/roles/pul_nomad/molecule/default/verify.yml b/roles/pul_nomad/molecule/default/verify.yml index 8696f171df..0b42526b19 100644 --- a/roles/pul_nomad/molecule/default/verify.yml +++ b/roles/pul_nomad/molecule/default/verify.yml @@ -34,22 +34,9 @@ assert: that: - "'nomadserver1' in nomad_operator_raft_results.stdout" -- name: Verify Clients - hosts: nomad_clients - gather_facts: false - tasks: - - name: register output of nomad node status - command: nomad node status - environment: - NOMAD_TOKEN: '{{ pul_nomad_management_token }}' - register: nomad_node_status - - name: test nomad client status - assert: - that: - - "'nomadclient1' in nomad_node_status.stdout" - - "'ready' in nomad_node_status.stdout" -- name: Verify Docker Nomad Driver - hosts: nomad_clients +# Podman should be in the RedHat box. +- name: Verify Podman Nomad Driver + hosts: nomadclient1 gather_facts: false tasks: - name: register output of nomad node status @@ -57,7 +44,7 @@ environment: NOMAD_TOKEN: '{{ pul_nomad_management_token }}' register: nomad_node_status - - name: test nomad docker driver + - name: test nomad podman driver assert: that: - - "'docker' in nomad_node_status.stdout" + - "'podman' in nomad_node_status.stdout" diff --git a/roles/pul_nomad/tasks/main.yml b/roles/pul_nomad/tasks/main.yml index 7b66f07807..0164cb8989 100644 --- a/roles/pul_nomad/tasks/main.yml +++ b/roles/pul_nomad/tasks/main.yml @@ -20,7 +20,7 @@ # Get current tokens - name: 'pul_nomad | Register existing tokens' ansible.builtin.shell: - cmd: "consul acl token list -format json" + cmd: "/usr/local/bin/consul acl token list -format json" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' register: tokens @@ -36,7 +36,7 @@ - name: 'nomad-node | Generate Consul ACL Agent Token' ansible.builtin.shell: - cmd: "consul acl token create -node-identity '{{ inventory_hostname_short }}:dc1' -format json" + cmd: "/usr/local/bin/consul acl token create -node-identity '{{ inventory_hostname_short }}:dc1' -format json" when: - "inventory_hostname_short not in existing_node_names" environment: @@ -46,7 +46,7 @@ - name: 'nomad-node | Set Consul ACL Agent Token' ansible.builtin.shell: - cmd: "consul acl set-agent-token agent {{ created_agent_token.stdout | from_json | community.general.json_query('SecretID')}}" + cmd: "/usr/local/bin/consul acl set-agent-token agent {{ created_agent_token.stdout | from_json | community.general.json_query('SecretID')}}" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' when: @@ -55,7 +55,7 @@ - name: 'pul_nomad | Create DNS ACL token' ansible.builtin.shell: - cmd: "consul acl token create -templated-policy 'builtin/dns' -secret '{{ consul_dns_token }}'" + cmd: "/usr/local/bin/consul acl token create -templated-policy 'builtin/dns' -secret '{{ consul_dns_token }}'" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' when: @@ -64,7 +64,7 @@ - name: 'pul_nomad | Assign DNS ACL token' ansible.builtin.shell: - cmd: "consul acl set-agent-token dns '{{ consul_dns_token }}'" + cmd: "/usr/local/bin/consul acl set-agent-token dns '{{ consul_dns_token }}'" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' @@ -72,32 +72,64 @@ # tasks file for pulibrary-nomad-node - name: Add Docker GPG apt Key - when: "nomad_node_role == 'client'" + when: + - "nomad_node_role == 'client'" + - "ansible_os_family == 'Debian'" apt_key: url: https://download.docker.com/linux/ubuntu/gpg state: present - name: Add Docker Repository - when: "nomad_node_role == 'client'" + when: + - "nomad_node_role == 'client'" + - "ansible_os_family == 'Debian'" apt_repository: repo: deb https://download.docker.com/linux/ubuntu jammy stable state: present - name: Update apt and install docker-ce - when: "nomad_node_role == 'client'" + when: + - "nomad_node_role == 'client'" + - "ansible_os_family == 'Debian'" apt: name: docker-ce state: latest update_cache: true - - name: 'nomad-node | Keep Docker up' - when: "nomad_node_role == 'client'" + when: + - "nomad_node_role == 'client'" + - "ansible_os_family == 'Debian'" ansible.builtin.service: name: "docker" state: "started" enabled: true +# Only install podman on Rocky boxes - it's too out of date for Debian machines. +- name: 'pul_nomad | Install Podman' + ansible.builtin.dnf: + name: podman + state: latest + when: + - "ansible_os_family == 'RedHat'" + +# DMIDecode is needed for Rocky boxes, it makes cpu fingerprinting work. +- name: 'pul_nomad | Install dmidecode' + ansible.builtin.dnf: + name: dmidecode + state: latest + when: + - "ansible_os_family == 'RedHat'" + +- name: 'nomad-node | Keep Podman up' + when: + - "nomad_node_role == 'client'" + - "ansible_os_family == 'RedHat'" + ansible.builtin.service: + name: "podman.socket" + state: "started" + enabled: true + - name: 'pul_nomad | Create Nomad Policy to reference' ansible.builtin.copy: src: pul_nomad_policy.hcl @@ -107,7 +139,7 @@ - name: 'pul_nomad | Register existing policies' ansible.builtin.shell: - cmd: "consul acl policy list -format json" + cmd: "/usr/local/bin/consul acl policy list -format json" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' register: policies @@ -119,7 +151,7 @@ - name: 'pul_nomad | Install Nomad Consul Policy' ansible.builtin.shell: - cmd: "consul acl policy create -name 'nomad-agents' -description 'Policy for Nomad agents' -rules '@/etc/consul/pul_nomad_policy.hcl'" + cmd: "/usr/local/bin/consul acl policy create -name 'nomad-agents' -description 'Policy for Nomad agents' -rules '@/etc/consul/pul_nomad_policy.hcl'" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' when: @@ -128,7 +160,7 @@ - name: 'pul_nomad | Create nomad ACL token' ansible.builtin.shell: - cmd: "consul acl token create -policy-name 'nomad-agents' -secret '{{ nomad_consul_token }}'" + cmd: "/usr/local/bin/consul acl token create -policy-name 'nomad-agents' -secret '{{ nomad_consul_token }}'" environment: CONSUL_HTTP_TOKEN: '{{ consul_acl_master_token }}' when: @@ -142,7 +174,7 @@ - name: 'pul_nomad | Register existing nomad acl tokens' ansible.builtin.shell: - cmd: "nomad acl token list || true" + cmd: "/usr/local/bin/nomad acl token list || true" environment: NOMAD_TOKEN: '{{ pul_nomad_management_token }}' register: nomad_acl_tokens @@ -152,7 +184,7 @@ - name: 'pul_nomad | Bootstrap nomad' ansible.builtin.command: - cmd: nomad acl bootstrap - + cmd: /usr/local/bin/nomad acl bootstrap - stdin: '{{ pul_nomad_management_token }}' when: - "unique_command_runner == inventory_hostname" From d0208676f0fefdc355936c80acadbfedb60890b0 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Mon, 22 Jul 2024 13:50:53 -0700 Subject: [PATCH 2/3] Servers are debian, clients are Rocky. --- roles/pul_nomad/molecule/default/molecule.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/pul_nomad/molecule/default/molecule.yml b/roles/pul_nomad/molecule/default/molecule.yml index d4ea0f6d66..a0428eae3d 100644 --- a/roles/pul_nomad/molecule/default/molecule.yml +++ b/roles/pul_nomad/molecule/default/molecule.yml @@ -22,7 +22,7 @@ lint: | ansible-lint platforms: - name: nomadserver1 - image: "eniocarboni/docker-rockylinux-systemd:latest" + image: "quay.io/pulibrary/jammy-ansible:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw @@ -46,7 +46,7 @@ platforms: - 127.0.0.1:4646:4646 # Nomad port # Debian box to ensure it can build a client on either. - name: nomadserver2 - image: "eniocarboni/docker-rockylinux-systemd:latest" + image: "quay.io/pulibrary/jammy-ansible:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw @@ -66,7 +66,7 @@ platforms: networks: - name: "nomad" - name: nomadserver3 - image: "eniocarboni/docker-rockylinux-systemd:latest" + image: "quay.io/pulibrary/jammy-ansible:latest" command: "" volumes: - /sys/fs/cgroup:/sys/fs/cgroup:rw From 1373d8f76f424aad02a7575e29f6e5c364d811b9 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 13 Aug 2024 09:01:06 -0700 Subject: [PATCH 3/3] Add nomad autopilot. This deletes old servers from the cluster automatically. --- group_vars/nomad_cluster.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/group_vars/nomad_cluster.yml b/group_vars/nomad_cluster.yml index a5aca279f7..92867a18f0 100644 --- a/group_vars/nomad_cluster.yml +++ b/group_vars/nomad_cluster.yml @@ -16,6 +16,8 @@ nomad_group_name: 'nomad_cluster' nomad_bind_address: '0.0.0.0' nomad_consul_token: "{{ vault_nomad_consul_token }}" nomad_acl_enabled: true +nomad_autopilot: true +nomad_autopilot_cleanup_dead_servers: true pul_nomad_management_token: "{{ vault_pul_nomad_management_token }}" tower_github_token: "{{ vault_tower_github_token }}" ## Don't install common packages, this just runs containers.