diff --git a/config/clusters/awi-ciroh/workshop.values.yaml b/config/clusters/awi-ciroh/workshop.values.yaml index 68ea8f2c5c..483fa8a1eb 100644 --- a/config/clusters/awi-ciroh/workshop.values.yaml +++ b/config/clusters/awi-ciroh/workshop.values.yaml @@ -1,20 +1,10 @@ basehub: # Enable node placeholders, to ensure capacity at first login nodePlaceholder: - enabled: true - nodeSelector: - node.kubernetes.io/instance-type: n4-standard-16 - replicas: 25 + enabled: false jupyterhub-home-nfs: - nodeSelector: - cloud.google.com/gke-nodepool: workshop-temp-nfs - tolerations: - - effect: NoSchedule - key: 2i2c.org/nfs-vm-family - operator: Equal - value: n4 gke: - volumeId: projects/ciroh-jupyterhub-423218/zones/us-central1-b/disks/home-nfs-workshop-hyperdisk + volumeId: projects/ciroh-jupyterhub-423218/zones/us-central1-b/disks/hub-nfs-homedirs-workshop quotaEnforcer: config: QuotaManager: @@ -42,7 +32,7 @@ basehub: # Limit total size, to prevent pod being evicted # with over use by any one user. This can be increased, # but may need to increase the size of the node disk - sizeLimit: 20Gi + sizeLimit: 5Gi profileList: # The mem-guarantees are here so k8s doesn't schedule other pods # on these nodes. They need to be just under total allocatable @@ -147,7 +137,7 @@ basehub: cpu_guarantee: 1.8887049999999999 cpu_limit: 15.109639999999999 node_selector: - node.kubernetes.io/instance-type: n4-standard-16 + node.kubernetes.io/instance-type: n2-standard-16 - display_name: Medium default: true description: ~14 GB RAM, ~4 CPUs. Up to ~15 CPUs when available @@ -158,7 +148,7 @@ basehub: cpu_guarantee: 3.7774099999999997 cpu_limit: 15.109639999999999 node_selector: - node.kubernetes.io/instance-type: n4-standard-16 + node.kubernetes.io/instance-type: n2-standard-16 - display_name: Large description: ~30 GB RAM, ~8 CPUs .Up to ~62 CPUs when available profile_options: *profile_options @@ -168,7 +158,7 @@ basehub: cpu_guarantee: 7.554819999999999 cpu_limit: 15.109639999999999 node_selector: - node.kubernetes.io/instance-type: n4-standard-16 + node.kubernetes.io/instance-type: n2-standard-16 - display_name: Huge description: ~59 GB RAM, ~16 CPUs. Up to ~62 CPUs when available profile_options: *profile_options @@ -180,7 +170,7 @@ basehub: cpu_guarantee: 15.109639999999999 cpu_limit: 15.109639999999999 node_selector: - node.kubernetes.io/instance-type: n4-standard-16 + node.kubernetes.io/instance-type: n2-standard-16 - display_name: Small (2i2c testing) profile_options: *profile_options allowed_groups: @@ -191,7 +181,7 @@ basehub: cpu_guarantee: 1.8887049999999999 cpu_limit: 15.109639999999999 node_selector: - node.kubernetes.io/instance-type: n4-standard-4 + node.kubernetes.io/instance-type: n2-standard-4 - display_name: NVIDIA Tesla T4, ~16 GB, ~4 CPUs description: Start a container on a dedicated node with a GPU allowed_groups: diff --git a/terraform/gcp/projects/awi-ciroh.tfvars b/terraform/gcp/projects/awi-ciroh.tfvars index b507bcaa05..a857bab200 100644 --- a/terraform/gcp/projects/awi-ciroh.tfvars +++ b/terraform/gcp/projects/awi-ciroh.tfvars @@ -3,13 +3,8 @@ project_id = "ciroh-jupyterhub-423218" zone = "us-central1-b" region = "us-central1" core_node_machine_type = "n2-highmem-4" -#core_node_machine_type = "n4-highmem-4" -enable_network_policy = true -enable_logging = false - -core_node_boot_disk = { - #type = "hyperdisk-balanced" -} +enable_network_policy = true +enable_logging = false enable_filestore_backups = true filestores = {} @@ -94,73 +89,10 @@ notebook_nodes = { max : 20, machine_type : "n2-standard-64", }, - - # Workshop N4 nodes - # Designed around n4-standard-16 node configuration - # These are optimal for supporting even huge instances whilst - # Having better perf on hyperdisks than 64 - # Model this on Medium profiles - # Assuming 20GiB scratch per user, 240MiB/s, 1250 IOPS - "n4-standard-4" : { - min : 0, - # Keep the numbers down, for safety! - max : 100, - machine_type : "n4-standard-4", - - disk_type : "hyperdisk-balanced", - - # Prefer large disks as cheap and safer - disk_size_gb : 160, - - # Bump these relative to scaling from 64, as the law of large numbers is worse for smaller samples - # And the pathological best-case is objectively worse (one person using IO can only hit disk limit) - # than the n4-standard-64 case - # Minimum 3000 - disk_iops : 3000, - # Limit of n4-standard-4 - disk_throughput : 240 - }, - "n4-standard-16" : { - min : 0, - # Keep the numbers down, for safety! - max : 100, - machine_type : "n4-standard-16", - - disk_type : "hyperdisk-balanced", - - # Allow for 50% oversubscription (small + medium) and 100GiB for images - # i.e. X = (X_user * N_user * 1.5) + 100 - disk_size_gb : 220, - - # Do not compute oversubscription due to small numbers -- - # Mix of smaller-than-medium profiles would disrupt this - disk_iops : 7000, - disk_throughput : 1200 - }, - "n4-standard-64" : { - min : 0, - # Keep the numbers down, for safety! - max : 30, - machine_type : "n4-standard-64", - - disk_type : "hyperdisk-balanced", - disk_size_gb : 580, - - # Limit of n4-standard-64 is 2400 MiB/s - disk_iops : 15000, - disk_throughput : 2400, - }, - "gpu-t4" : { min : 0, max : 20, machine_type : "n1-highmem-8", - - # Use regular storage for scratch - # As this is an n1 node - disk_type : "pd-ssd", - disk_size_gb : 120, - gpu : { enabled : true, type : "nvidia-tesla-t4",