Use TLS Zookeeper connections

This creates 2 TLS secrets: one for internal use among Zookeeper quorum
members, and a second for use by Zuul and Nodepool as clients.

It uses pre-generated output from the helm charts (with a locally applied
patch to add TLS support while we wait for an upstream PR which adds it
to merge).  These switch the existing cluster to use server-size and
client-side TLS.

Zuul and Nodepool configurations are also updated to use the new client
certs.

Change-Id: I6951c39501c61291d2f7eec43426251c603bc537
diff --git a/k8s/nodepool.yaml b/k8s/nodepool.yaml
index f3b2eab..eb5a7fe 100644
--- a/k8s/nodepool.yaml
+++ b/k8s/nodepool.yaml
@@ -34,8 +34,14 @@
         volumeMounts:
         - name: nodepool-config
           mountPath: /etc/nodepool
+        - name: zookeeper-client-tls
+          mountPath: /tls/client
+          readOnly: true
       volumes:
       - name: nodepool-config
         secret:
           secretName: nodepool-gcs
+      - name: zookeeper-client-tls
+        secret:
+          secretName: zookeeper-client-tls
       serviceAccountName: nodepool
diff --git a/k8s/zookeeper/README b/k8s/zookeeper/README
new file mode 100644
index 0000000..c0226d4
--- /dev/null
+++ b/k8s/zookeeper/README
@@ -0,0 +1,11 @@
+This directory contains deployment code for Zookeeper.
+
+The file `certs.yaml` is manually created and contains instructions
+for creating TLS certs for Zookeeper.
+
+The file `zookeeper.yaml` is the output of the Zookeeper Helm chart
+with https://github.com/helm/charts/pull/23480 applied and
+`values.yaml` as input.
+
+Once that PR is merged, we can switch to using the plain upstream Helm
+chart.
diff --git a/k8s/zookeeper/certs.yaml b/k8s/zookeeper/certs.yaml
new file mode 100644
index 0000000..b47fbf1
--- /dev/null
+++ b/k8s/zookeeper/certs.yaml
@@ -0,0 +1,25 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: zookeeper
+---
+apiVersion: cert-manager.io/v1alpha2
+kind: Certificate
+metadata:
+  name: zookeeper-server
+  namespace: zookeeper
+spec:
+  keyEncoding: pkcs8
+  secretName: zookeeper-server-tls
+  commonName: server
+  dnsNames:
+  - zookeeper-0.zookeeper-headless.zookeeper.svc.cluster.local
+  - zookeeper-0
+  - zookeeper-1.zookeeper-headless.zookeeper.svc.cluster.local
+  - zookeeper-1
+  - zookeeper-2.zookeeper-headless.zookeeper.svc.cluster.local
+  - zookeeper-2
+  issuerRef:
+    name: ca-issuer
+    kind: ClusterIssuer
diff --git a/k8s/zookeeper/values.yaml b/k8s/zookeeper/values.yaml
new file mode 100644
index 0000000..336782b
--- /dev/null
+++ b/k8s/zookeeper/values.yaml
@@ -0,0 +1,313 @@
+## As weighted quorums are not supported, it is imperative that an odd number of replicas
+## be chosen. Moreover, the number of replicas should be either 1, 3, 5, or 7.
+##
+## ref: https://github.com/kubernetes/contrib/tree/master/statefulsets/zookeeper#stateful-set
+replicaCount: 3  # Desired quantity of ZooKeeper pods. This should always be (1,3,5, or 7)
+
+podDisruptionBudget:
+  maxUnavailable: 1  # Limits how many Zokeeper pods may be unavailable due to voluntary disruptions.
+
+terminationGracePeriodSeconds: 1800  # Duration in seconds a Zokeeper pod needs to terminate gracefully.
+
+updateStrategy:
+  type: RollingUpdate
+
+## refs:
+## - https://github.com/kubernetes/contrib/tree/master/statefulsets/zookeeper
+## - https://github.com/kubernetes/contrib/blob/master/statefulsets/zookeeper/Makefile#L1
+image:
+  repository: zookeeper     # Container image repository for zookeeper container.
+  tag: 3.5.5                # Container image tag for zookeeper container.
+  pullPolicy: IfNotPresent  # Image pull criteria for zookeeper container.
+
+service:
+  type: ClusterIP  # Exposes zookeeper on a cluster-internal IP.
+  annotations: {}  # Arbitrary non-identifying metadata for zookeeper service.
+    ## AWS example for use with LoadBalancer service type.
+    # external-dns.alpha.kubernetes.io/hostname: zookeeper.cluster.local
+    # service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true"
+    # service.beta.kubernetes.io/aws-load-balancer-internal: "true"
+  ports:
+    client:
+      port: 2281  # Service port number for client port.
+      targetPort: client  # Service target port for client port.
+      protocol: TCP  # Service port protocol for client port.
+
+## Headless service.
+##
+headless:
+  annotations: {}
+  # publishNotReadyAddresses, default false for backward compatibility
+  # set to true to register DNS entries for unready pods, which helps in rare
+  # occasions when cluster is unable to be created, DNS caching is enforced
+  # or pods are in persistent crash loop
+  publishNotReadyAddresses: true
+
+ports:
+  client:
+    containerPort: 2281  # Port number for zookeeper container client port.
+    protocol: TCP  # Protocol for zookeeper container client port.
+  election:
+    containerPort: 3888  # Port number for zookeeper container election port.
+    protocol: TCP  # Protocol for zookeeper container election port.
+  server:
+    containerPort: 2888  # Port number for zookeeper container server port.
+    protocol: TCP  # Protocol for zookeeper container server port.
+
+resources: {}  # Optionally specify how much CPU and memory (RAM) each zookeeper container needs.
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #  cpu: 100m
+  #  memory: 128Mi
+  # requests:
+  #  cpu: 100m
+  #  memory: 128Mi
+
+priorityClassName: ""
+
+nodeSelector: {}  # Node label-values required to run zookeeper pods.
+
+tolerations: []  # Node taint overrides for zookeeper pods.
+
+affinity: {}  # Criteria by which pod label-values influence scheduling for zookeeper pods.
+  # podAntiAffinity:
+  #   requiredDuringSchedulingIgnoredDuringExecution:
+  #     - topologyKey: "kubernetes.io/hostname"
+  #       labelSelector:
+  #         matchLabels:
+  #           release: zookeeper
+
+podAnnotations: {}  # Arbitrary non-identifying metadata for zookeeper pods.
+  # prometheus.io/scrape: "true"
+  # prometheus.io/path: "/metrics"
+  # prometheus.io/port: "9141"
+
+podLabels: {}  # Key/value pairs that are attached to zookeeper pods.
+  # team: "developers"
+  # service: "zookeeper"
+
+securityContext:
+  fsGroup: 1000
+  runAsUser: 1000
+
+## Useful, if you want to use an alternate image.
+command:
+  - /bin/bash
+  - -xec
+  - /config-scripts/run
+
+## If you want the quorum to use SSL internally, create a
+## kubernetes.io/tls object (cert-manager produces these), and ensure
+## that it has a SAN entry for each of the server hostnames (e.g.:
+## zookeeper-0.zookeeper-headless.namespace.svc.cluster.local).
+## Supply the name of the object here:
+serverTlsSecret: "zookeeper-server-tls"
+
+## To use client SSL, create a kubernetes.io/tls object (cert-manager
+## produces these), and supply the name here.  If this is provided,
+## then plain-text client connections are disabled.  The same secret
+## may be used for both client and server connections.
+clientTlsSecret: "zookeeper-server-tls"
+
+## Useful if using any custom authorizer.
+## Pass any secrets to the kafka pods. Each secret will be passed as an
+## environment variable by default. The secret can also be mounted to a
+## specific path (in addition to environment variable) if required. Environment
+## variable names are generated as: `<secretName>_<secretKey>` (All upper case)
+# secrets:
+# - name: myKafkaSecret
+#   keys:
+#     - username
+#     - password
+#   # mountPath: /opt/kafka/secret
+# - name: myZkSecret
+#   keys:
+#     - user
+#     - pass
+#   mountPath: /opt/zookeeper/secret
+
+persistence:
+  enabled: true
+  ## zookeeper data Persistent Volume Storage Class
+  ## If defined, storageClassName: <storageClass>
+  ## If set to "-", storageClassName: "", which disables dynamic provisioning
+  ## If undefined (the default) or set to null, no storageClassName spec is
+  ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
+  ##   GKE, AWS & OpenStack)
+  ##
+  # storageClass: "-"
+  accessMode: ReadWriteOnce
+  size: 5Gi
+
+## Exporters query apps for metrics and make those metrics available for
+## Prometheus to scrape.
+exporters:
+
+  jmx:
+    enabled: false
+    image:
+      repository: sscaling/jmx-prometheus-exporter
+      tag: 0.3.0
+      pullPolicy: IfNotPresent
+    config:
+      lowercaseOutputName: false
+      ## ref: https://github.com/prometheus/jmx_exporter/blob/master/example_configs/zookeeper.yaml
+      rules:
+        - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+)><>(\\w+)"
+          name: "zookeeper_$2"
+        - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+)><>(\\w+)"
+          name: "zookeeper_$3"
+          labels:
+            replicaId: "$2"
+        - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(\\w+)"
+          name: "zookeeper_$4"
+          labels:
+            replicaId: "$2"
+            memberType: "$3"
+        - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+), name3=(\\w+)><>(\\w+)"
+          name: "zookeeper_$4_$5"
+          labels:
+            replicaId: "$2"
+            memberType: "$3"
+      startDelaySeconds: 30
+    env: {}
+    resources: {}
+    path: /metrics
+    ports:
+      jmxxp:
+        containerPort: 9404
+        protocol: TCP
+    livenessProbe:
+      httpGet:
+        path: /metrics
+        port: jmxxp
+      initialDelaySeconds: 30
+      periodSeconds: 15
+      timeoutSeconds: 60
+      failureThreshold: 8
+      successThreshold: 1
+    readinessProbe:
+      httpGet:
+        path: /metrics
+        port: jmxxp
+      initialDelaySeconds: 30
+      periodSeconds: 15
+      timeoutSeconds: 60
+      failureThreshold: 8
+      successThreshold: 1
+    serviceMonitor:
+      interval: 30s
+      scrapeTimeout: 30s
+      scheme: http
+
+  zookeeper:
+  ## refs:
+  ## - https://github.com/carlpett/zookeeper_exporter
+  ## - https://hub.docker.com/r/josdotso/zookeeper-exporter/
+  ## - https://www.datadoghq.com/blog/monitoring-kafka-performance-metrics/#zookeeper-metrics
+    enabled: false
+    image:
+      repository: josdotso/zookeeper-exporter
+      tag: v1.1.2
+      pullPolicy: IfNotPresent
+    config:
+      logLevel: info
+      resetOnScrape: "true"
+    env: {}
+    resources: {}
+    path: /metrics
+    ports:
+      zookeeperxp:
+        containerPort: 9141
+        protocol: TCP
+    livenessProbe:
+      httpGet:
+        path: /metrics
+        port: zookeeperxp
+      initialDelaySeconds: 30
+      periodSeconds: 15
+      timeoutSeconds: 60
+      failureThreshold: 8
+      successThreshold: 1
+    readinessProbe:
+      httpGet:
+        path: /metrics
+        port: zookeeperxp
+      initialDelaySeconds: 30
+      periodSeconds: 15
+      timeoutSeconds: 60
+      failureThreshold: 8
+      successThreshold: 1
+    serviceMonitor:
+      interval: 30s
+      scrapeTimeout: 30s
+      scheme: http
+
+## ServiceMonitor configuration in case you are using Prometheus Operator
+prometheus:
+  serviceMonitor:
+    ## If true a ServiceMonitor for each enabled exporter will be installed
+    enabled: false
+    ## The namespace where the ServiceMonitor(s) will be installed
+    # namespace: monitoring
+    ## The selector the Prometheus instance is searching for
+    ## [Default Prometheus Operator selector] (https://github.com/helm/charts/blob/f5a751f174263971fafd21eee4e35416d6612a3d/stable/prometheus-operator/templates/prometheus/prometheus.yaml#L74)
+    selector: {}
+
+## Use an alternate scheduler, e.g. "stork".
+## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
+##
+# schedulerName:
+
+## ref: https://github.com/kubernetes/contrib/tree/master/statefulsets/zookeeper
+env:
+
+  ## Options related to JMX exporter.
+  ## ref: https://github.com/apache/zookeeper/blob/master/bin/zkServer.sh#L36
+  JMXAUTH: "false"
+  JMXDISABLE: "false"
+  JMXPORT: 1099
+  JMXSSL: "false"
+
+  ## The port on which the server will accept client requests.
+  ZOO_PORT: 2181
+
+  ## The number of Ticks that an ensemble member is allowed to perform leader
+  ## election.
+  ZOO_INIT_LIMIT: 5
+
+  ZOO_TICK_TIME: 2000
+
+  ## The maximum number of concurrent client connections that
+  ## a server in the ensemble will accept.
+  ZOO_MAX_CLIENT_CNXNS: 60
+
+  ## The number of Tick by which a follower may lag behind the ensembles leader.
+  ZK_SYNC_LIMIT: 10
+
+  ## The number of wall clock ms that corresponds to a Tick for the ensembles
+  ## internal time.
+  ZK_TICK_TIME: 2000
+
+  ZOO_AUTOPURGE_PURGEINTERVAL: 0
+  ZOO_AUTOPURGE_SNAPRETAINCOUNT: 3
+  ZOO_STANDALONE_ENABLED: false
+
+jobs:
+  ## ref: http://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkSessions
+  chroots:
+    enabled: false
+    activeDeadlineSeconds: 300
+    backoffLimit: 5
+    completions: 1
+    config:
+      create: []
+        # - /kafka
+        # - /ureplicator
+    env: []
+    parallelism: 1
+    resources: {}
+    restartPolicy: Never
diff --git a/k8s/zookeeper/zookeeper.yaml b/k8s/zookeeper/zookeeper.yaml
new file mode 100644
index 0000000..b61fce6
--- /dev/null
+++ b/k8s/zookeeper/zookeeper.yaml
@@ -0,0 +1,349 @@
+---
+# Source: zookeeper/templates/poddisruptionbudget.yaml
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+  name: zookeeper
+  labels:
+    app: zookeeper
+    chart: zookeeper-2.1.5
+    release: zookeeper
+    heritage: Helm
+    component: server
+spec:
+  selector:
+    matchLabels:
+      app: zookeeper
+      release: zookeeper
+      component: server
+  maxUnavailable: 1
+---
+# Source: zookeeper/templates/config-script.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: zookeeper
+  labels:
+    app: zookeeper
+    chart: zookeeper-2.1.5
+    release: zookeeper
+    heritage: Helm
+    component: server
+data:
+    ok: |
+      #!/bin/sh
+      if [ -f /tls/client/ca.crt ]; then
+        echo "srvr" | openssl s_client -CAfile /tls/client/ca.crt -cert /tls/client/tls.crt -key /tls/client/tls.key -connect 127.0.0.1:${1:-2281} -quiet -ign_eof 2>/dev/null | grep Mode
+      else
+        zkServer.sh status
+      fi
+
+    ready: |
+      #!/bin/sh
+      if [ -f /tls/client/ca.crt ]; then
+        echo "ruok" | openssl s_client -CAfile /tls/client/ca.crt -cert /tls/client/tls.crt -key /tls/client/tls.key -connect 127.0.0.1:${1:-2281} -quiet -ign_eof 2>/dev/null
+      else
+        echo ruok | nc 127.0.0.1 ${1:-2181}
+      fi
+
+    run: |
+      #!/bin/bash
+
+      set -a
+      ROOT=$(echo /apache-zookeeper-*)
+
+      ZK_USER=${ZK_USER:-"zookeeper"}
+      ZK_LOG_LEVEL=${ZK_LOG_LEVEL:-"INFO"}
+      ZK_DATA_DIR=${ZK_DATA_DIR:-"/data"}
+      ZK_DATA_LOG_DIR=${ZK_DATA_LOG_DIR:-"/data/log"}
+      ZK_CONF_DIR=${ZK_CONF_DIR:-"/conf"}
+      ZK_CLIENT_PORT=${ZK_CLIENT_PORT:-2181}
+      ZK_SSL_CLIENT_PORT=${ZK_SSL_CLIENT_PORT:-2281}
+      ZK_SERVER_PORT=${ZK_SERVER_PORT:-2888}
+      ZK_ELECTION_PORT=${ZK_ELECTION_PORT:-3888}
+      ZK_TICK_TIME=${ZK_TICK_TIME:-2000}
+      ZK_INIT_LIMIT=${ZK_INIT_LIMIT:-10}
+      ZK_SYNC_LIMIT=${ZK_SYNC_LIMIT:-5}
+      ZK_HEAP_SIZE=${ZK_HEAP_SIZE:-2G}
+      ZK_MAX_CLIENT_CNXNS=${ZK_MAX_CLIENT_CNXNS:-60}
+      ZK_MIN_SESSION_TIMEOUT=${ZK_MIN_SESSION_TIMEOUT:- $((ZK_TICK_TIME*2))}
+      ZK_MAX_SESSION_TIMEOUT=${ZK_MAX_SESSION_TIMEOUT:- $((ZK_TICK_TIME*20))}
+      ZK_SNAP_RETAIN_COUNT=${ZK_SNAP_RETAIN_COUNT:-3}
+      ZK_PURGE_INTERVAL=${ZK_PURGE_INTERVAL:-0}
+      ID_FILE="$ZK_DATA_DIR/myid"
+      ZK_CONFIG_FILE="$ZK_CONF_DIR/zoo.cfg"
+      LOG4J_PROPERTIES="$ZK_CONF_DIR/log4j.properties"
+      HOST=$(hostname)
+      DOMAIN=`hostname -d`
+      JVMFLAGS="-Xmx$ZK_HEAP_SIZE -Xms$ZK_HEAP_SIZE"
+
+      APPJAR=$(echo $ROOT/*jar)
+      CLASSPATH="${ROOT}/lib/*:${APPJAR}:${ZK_CONF_DIR}:"
+
+      if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
+          NAME=${BASH_REMATCH[1]}
+          ORD=${BASH_REMATCH[2]}
+          MY_ID=$((ORD+1))
+      else
+          echo "Failed to extract ordinal from hostname $HOST"
+          exit 1
+      fi
+
+      mkdir -p $ZK_DATA_DIR
+      mkdir -p $ZK_DATA_LOG_DIR
+      echo $MY_ID >> $ID_FILE
+
+      if [[ -f /tls/server/ca.crt ]]; then
+        cp /tls/server/ca.crt /data/server-ca.pem
+        cat /tls/server/tls.crt /tls/server/tls.key > /data/server.pem
+      fi
+      if [[ -f /tls/client/ca.crt ]]; then
+        cp /tls/client/ca.crt /data/client-ca.pem
+        cat /tls/client/tls.crt /tls/client/tls.key > /data/client.pem
+      fi
+
+      echo "dataDir=$ZK_DATA_DIR" >> $ZK_CONFIG_FILE
+      echo "dataLogDir=$ZK_DATA_LOG_DIR" >> $ZK_CONFIG_FILE
+      echo "tickTime=$ZK_TICK_TIME" >> $ZK_CONFIG_FILE
+      echo "initLimit=$ZK_INIT_LIMIT" >> $ZK_CONFIG_FILE
+      echo "syncLimit=$ZK_SYNC_LIMIT" >> $ZK_CONFIG_FILE
+      echo "maxClientCnxns=$ZK_MAX_CLIENT_CNXNS" >> $ZK_CONFIG_FILE
+      echo "minSessionTimeout=$ZK_MIN_SESSION_TIMEOUT" >> $ZK_CONFIG_FILE
+      echo "maxSessionTimeout=$ZK_MAX_SESSION_TIMEOUT" >> $ZK_CONFIG_FILE
+      echo "autopurge.snapRetainCount=$ZK_SNAP_RETAIN_COUNT" >> $ZK_CONFIG_FILE
+      echo "autopurge.purgeInterval=$ZK_PURGE_INTERVAL" >> $ZK_CONFIG_FILE
+      echo "4lw.commands.whitelist=*" >> $ZK_CONFIG_FILE
+
+      # Client TLS configuration
+      if [[ -f /tls/client/ca.crt ]]; then
+        echo "secureClientPort=$ZK_SSL_CLIENT_PORT" >> $ZK_CONFIG_FILE
+        echo "ssl.keyStore.location=/data/client.pem" >> $ZK_CONFIG_FILE
+        echo "ssl.trustStore.location=/data/client-ca.pem" >> $ZK_CONFIG_FILE
+      else
+        echo "clientPort=$ZK_CLIENT_PORT" >> $ZK_CONFIG_FILE
+      fi
+
+      # Server TLS configuration
+      if [[ -f /tls/server/ca.crt ]]; then
+        echo "serverCnxnFactory=org.apache.zookeeper.server.NettyServerCnxnFactory" >> $ZK_CONFIG_FILE
+        echo "sslQuorum=true" >> $ZK_CONFIG_FILE
+        echo "ssl.quorum.keyStore.location=/data/server.pem" >> $ZK_CONFIG_FILE
+        echo "ssl.quorum.trustStore.location=/data/server-ca.pem" >> $ZK_CONFIG_FILE
+      fi
+
+      for (( i=1; i<=$ZK_REPLICAS; i++ ))
+      do
+          echo "server.$i=$NAME-$((i-1)).$DOMAIN:$ZK_SERVER_PORT:$ZK_ELECTION_PORT" >> $ZK_CONFIG_FILE
+      done
+
+      rm -f $LOG4J_PROPERTIES
+
+      echo "zookeeper.root.logger=$ZK_LOG_LEVEL, CONSOLE" >> $LOG4J_PROPERTIES
+      echo "zookeeper.console.threshold=$ZK_LOG_LEVEL" >> $LOG4J_PROPERTIES
+      echo "zookeeper.log.threshold=$ZK_LOG_LEVEL" >> $LOG4J_PROPERTIES
+      echo "zookeeper.log.dir=$ZK_DATA_LOG_DIR" >> $LOG4J_PROPERTIES
+      echo "zookeeper.log.file=zookeeper.log" >> $LOG4J_PROPERTIES
+      echo "zookeeper.log.maxfilesize=256MB" >> $LOG4J_PROPERTIES
+      echo "zookeeper.log.maxbackupindex=10" >> $LOG4J_PROPERTIES
+      echo "zookeeper.tracelog.dir=$ZK_DATA_LOG_DIR" >> $LOG4J_PROPERTIES
+      echo "zookeeper.tracelog.file=zookeeper_trace.log" >> $LOG4J_PROPERTIES
+      echo "log4j.rootLogger=\${zookeeper.root.logger}" >> $LOG4J_PROPERTIES
+      echo "log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender" >> $LOG4J_PROPERTIES
+      echo "log4j.appender.CONSOLE.Threshold=\${zookeeper.console.threshold}" >> $LOG4J_PROPERTIES
+      echo "log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout" >> $LOG4J_PROPERTIES
+      echo "log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n" >> $LOG4J_PROPERTIES
+
+      if [ -n "$JMXDISABLE" ]
+      then
+          MAIN=org.apache.zookeeper.server.quorum.QuorumPeerMain
+      else
+          MAIN="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=$JMXPORT -Dcom.sun.management.jmxremote.authenticate=$JMXAUTH -Dcom.sun.management.jmxremote.ssl=$JMXSSL -Dzookeeper.jmx.log4j.disable=$JMXLOG4J org.apache.zookeeper.server.quorum.QuorumPeerMain"
+      fi
+
+      set -x
+      exec java -cp "$CLASSPATH" $JVMFLAGS $MAIN $ZK_CONFIG_FILE
+---
+# Source: zookeeper/templates/service-headless.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: zookeeper-headless
+  labels:
+    app: zookeeper
+    chart: zookeeper-2.1.5
+    release: zookeeper
+    heritage: Helm
+spec:
+  clusterIP: None
+  publishNotReadyAddresses: true
+  ports:
+    - name: client
+      port: 2281
+      targetPort: client
+      protocol: TCP
+    - name: election
+      port: 3888
+      targetPort: election
+      protocol: TCP
+    - name: server
+      port: 2888
+      targetPort: server
+      protocol: TCP
+  selector:
+    app: zookeeper
+    release: zookeeper
+---
+# Source: zookeeper/templates/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: zookeeper
+  labels:
+    app: zookeeper
+    chart: zookeeper-2.1.5
+    release: zookeeper
+    heritage: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - name: client
+      port: 2281
+      protocol: TCP
+      targetPort: client
+  selector:
+    app: zookeeper
+    release: zookeeper
+---
+# Source: zookeeper/templates/statefulset.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: zookeeper
+  labels:
+    app: zookeeper
+    chart: zookeeper-2.1.5
+    release: zookeeper
+    heritage: Helm
+    component: server
+spec:
+  serviceName: zookeeper-headless
+  replicas: 3
+  selector:
+    matchLabels:
+      app: zookeeper
+      release: zookeeper
+      component: server
+  updateStrategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        app: zookeeper
+        release: zookeeper
+        component: server
+    spec:
+      terminationGracePeriodSeconds: 1800
+      securityContext:
+        fsGroup: 1000
+        runAsUser: 1000
+      containers:
+
+        - name: zookeeper
+          image: "zookeeper:3.5.5"
+          imagePullPolicy: IfNotPresent
+          command: 
+             - "/bin/bash"
+             - "-xec"
+             - "/config-scripts/run"
+          ports:
+            - name: client
+              containerPort: 2281
+              protocol: TCP
+            - name: election
+              containerPort: 3888
+              protocol: TCP
+            - name: server
+              containerPort: 2888
+              protocol: TCP
+          livenessProbe:
+            exec:
+              command:
+                - sh
+                - /config-scripts/ok
+            initialDelaySeconds: 20
+            periodSeconds: 30
+            timeoutSeconds: 5
+            failureThreshold: 2
+            successThreshold: 1
+          readinessProbe:
+            exec:
+              command:
+                - sh
+                - /config-scripts/ready
+            initialDelaySeconds: 20
+            periodSeconds: 30
+            timeoutSeconds: 5
+            failureThreshold: 2
+            successThreshold: 1
+          env:
+            - name: ZK_REPLICAS
+              value: "3"
+            - name: JMXAUTH
+              value: "false"
+            - name: JMXDISABLE
+              value: "false"
+            - name: JMXPORT
+              value: "1099"
+            - name: JMXSSL
+              value: "false"
+            - name: ZK_SYNC_LIMIT
+              value: "10"
+            - name: ZK_TICK_TIME
+              value: "2000"
+            - name: ZOO_AUTOPURGE_PURGEINTERVAL
+              value: "0"
+            - name: ZOO_AUTOPURGE_SNAPRETAINCOUNT
+              value: "3"
+            - name: ZOO_INIT_LIMIT
+              value: "5"
+            - name: ZOO_MAX_CLIENT_CNXNS
+              value: "60"
+            - name: ZOO_PORT
+              value: "2181"
+            - name: ZOO_STANDALONE_ENABLED
+              value: "false"
+            - name: ZOO_TICK_TIME
+              value: "2000"
+          resources:
+            {}
+          volumeMounts:
+            - name: data
+              mountPath: /data
+            - name: zookeeper-server-tls
+              mountPath: /tls/server
+              readOnly: true
+            - name: zookeeper-client-tls
+              mountPath: /tls/client
+              readOnly: true
+            - name: config
+              mountPath: /config-scripts
+      volumes:
+        - name: config
+          configMap:
+            name: zookeeper
+            defaultMode: 0555
+        - name: zookeeper-server-tls
+          secret:
+            secretName: zookeeper-server-tls
+        - name: zookeeper-client-tls
+          secret:
+            secretName: zookeeper-server-tls
+  volumeClaimTemplates:
+    - metadata:
+        name: data
+      spec:
+        accessModes:
+          - "ReadWriteOnce"
+        resources:
+          requests:
+            storage: "5Gi"
diff --git a/k8s/zuul.yaml b/k8s/zuul.yaml
index 1cd88e1..275039e 100644
--- a/k8s/zuul.yaml
+++ b/k8s/zuul.yaml
@@ -1,4 +1,17 @@
 ---
+apiVersion: cert-manager.io/v1alpha2
+kind: Certificate
+metadata:
+  name: zookeeper-client
+  namespace: zuul
+spec:
+  keyEncoding: pkcs8
+  secretName: zookeeper-client-tls
+  commonName: client
+  issuerRef:
+    name: ca-issuer
+    kind: ClusterIssuer
+---
 apiVersion: cloud.google.com/v1beta1
 kind: BackendConfig
 metadata:
@@ -114,10 +127,16 @@
         volumeMounts:
         - name: zuul-config
           mountPath: /etc/zuul
+        - name: zookeeper-client-tls
+          mountPath: /tls/client
+          readOnly: true
       volumes:
       - name: zuul-config
         secret:
           secretName: zuul-config
+      - name: zookeeper-client-tls
+        secret:
+          secretName: zookeeper-client-tls
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -157,10 +176,16 @@
         volumeMounts:
         - name: zuul-config
           mountPath: /etc/zuul
+        - name: zookeeper-client-tls
+          mountPath: /tls/client
+          readOnly: true
       volumes:
       - name: zuul-config
         secret:
           secretName: zuul-config
+      - name: zookeeper-client-tls
+        secret:
+          secretName: zookeeper-client-tls
 ---
 apiVersion: apps/v1
 kind: StatefulSet
@@ -210,6 +235,9 @@
           mountPath: /var/lib/zuul
         - name: nodepool-private-key
           mountPath: /var/lib/zuul/ssh
+        - name: zookeeper-client-tls
+          mountPath: /tls/client
+          readOnly: true
         securityContext:
           privileged: true
       volumes:
@@ -225,6 +253,9 @@
       - name: nodepool-private-key
         secret:
           secretName: nodepool-private-key
+      - name: zookeeper-client-tls
+        secret:
+          secretName: zookeeper-client-tls
 ---
 apiVersion: apps/v1
 kind: StatefulSet
@@ -269,6 +300,9 @@
           mountPath: /etc/zuul/tenant
         - name: zuul-scheduler
           mountPath: /var/lib/zuul
+        - name: zookeeper-client-tls
+          mountPath: /tls/client
+          readOnly: true
       volumes:
       - name: zuul-config
         secret:
@@ -276,6 +310,9 @@
       - name: zuul-tenant-config
         secret:
           secretName: zuul-tenant-config
+      - name: zookeeper-client-tls
+        secret:
+          secretName: zookeeper-client-tls
       serviceAccountName: zuul
   volumeClaimTemplates:
   - metadata:
diff --git a/nodepool/nodepool.yaml b/nodepool/nodepool.yaml
index b2f3a6e..6b2bc47 100644
--- a/nodepool/nodepool.yaml
+++ b/nodepool/nodepool.yaml
@@ -1,7 +1,11 @@
 diskimages: []
 zookeeper-servers:
   - host: zookeeper.zookeeper
-    port: 2181
+    port: 2281
+zookeeper-tls:
+  ca: /tls/client/ca.crt
+  cert: /tls/client/tls.crt
+  key: /tls/client/tls.key
 providers:
   - name: gcloud-provider
     driver: gce
diff --git a/playbooks/deploy.yaml b/playbooks/deploy.yaml
index 073d283..9495de8 100644
--- a/playbooks/deploy.yaml
+++ b/playbooks/deploy.yaml
@@ -19,6 +19,11 @@
         state: present
         src: "{{ root }}/k8s/certmanager.yaml"
 
+    - name: Update Zookeeper deployment
+      k8s:
+        state: present
+        src: "{{ root }}/k8s/zookeeper/zookeeper.yaml"
+
     - name: Update Letsencrypt configuration
       k8s:
         state: present
diff --git a/zuul/zuul.conf b/zuul/zuul.conf
index 31b34fc..e770e93 100644
--- a/zuul/zuul.conf
+++ b/zuul/zuul.conf
@@ -3,7 +3,10 @@
 port=4730
 
 [zookeeper]
-hosts=zookeeper.zookeeper
+hosts=zookeeper.zookeeper:2281
+tls_ca=/tls/client/ca.crt
+tls_cert=/tls/client/tls.crt
+tls_key=/tls/client/tls.key
 
 [gearman_server]
 start=true