Parcourir la source

implemented HA for helm

Jean-Baptiste Blanc il y a 4 ans
Parent
commit
4034e127cd
6 fichiers modifiés avec 44 ajouts et 25 suppressions
  1. BIN
      .DS_Store
  2. 12 6
      flink/README.md
  3. BIN
      flink/charts/zookeeper-2.1.0.tgz
  4. 6 0
      flink/requirements.lock
  5. 1 1
      flink/requirements.yaml
  6. 25 18
      flink/values.yaml

BIN
.DS_Store


+ 12 - 6
flink/README.md

@@ -57,12 +57,12 @@ following configurable parameters(other parameters can be found in values.yaml):
 | Parameter                                | Description                                                                                                                                                              | Default                |
 | Parameter                                | Description                                                                                                                                                              | Default                |
 |------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
 |------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
 | `image.repository`                       | Flink Container image name                                                                                                                                               | `flink`                |
 | `image.repository`                       | Flink Container image name                                                                                                                                               | `flink`                |
-| `image.tag`                              | Flink Container image tag                                                                                                                                                | `1.11.2-scala_2.12`    |
+| `image.tag`                              | Flink Container image tag                                                                                                                                                | `1.12.1-scala_2.12-java11`    |
 | `image.PullPolicy`                       | Flink Containers pull policy                                                                                                                                             | `IfNotPresent`         |
 | `image.PullPolicy`                       | Flink Containers pull policy                                                                                                                                             | `IfNotPresent`         |
 | `flink.monitoring.enabled`               | Enables Flink monitoring                                                                                                                                                 | `true`                 |
 | `flink.monitoring.enabled`               | Enables Flink monitoring                                                                                                                                                 | `true`                 |
-| `jobmanager.highAvailability.enabled`    | Enables Jobmanager HA mode key                                                                                                                                           | `false`                |
+| `jobmanager.highAvailability.enabled`    | Enables Jobmanager HA mode key                                                                                                                                           | `true`                |
 | `jobmanager.highAvailability.storageDir` | storageDir for Jobmanager in HA mode                                                                                                                                     | `null`                 |
 | `jobmanager.highAvailability.storageDir` | storageDir for Jobmanager in HA mode                                                                                                                                     | `null`                 |
-| `jobmanager.replicaCount`                | Jobmanagers count context                                                                                                                                                | `1`                    |
+| `jobmanager.replicaCount`                | Jobmanagers count context                                                                                                                                                | `3`                    |
 | `jobmanager.heapSize`                    | Jobmanager HeapSize options                                                                                                                                              | `1g`                   |
 | `jobmanager.heapSize`                    | Jobmanager HeapSize options                                                                                                                                              | `1g`                   |
 | `jobmanager.resources`                   | Jobmanager resources                                                                                                                                                     | `{}`                   |
 | `jobmanager.resources`                   | Jobmanager resources                                                                                                                                                     | `{}`                   |
 | `taskmanager.resources`                  | Taskmanager Resources key                                                                                                                                                | `{}`                   |
 | `taskmanager.resources`                  | Taskmanager Resources key                                                                                                                                                | `{}`                   |
@@ -70,7 +70,7 @@ following configurable parameters(other parameters can be found in values.yaml):
 | `jobmanager.replicaCount`                | Taskmanager count context                                                                                                                                                | `1`                    |
 | `jobmanager.replicaCount`                | Taskmanager count context                                                                                                                                                | `1`                    |
 | `taskmanager.numberOfTaskSlots`          | Number of Taskmanager taskSlots resources                                                                                                                                | `1`                    |
 | `taskmanager.numberOfTaskSlots`          | Number of Taskmanager taskSlots resources                                                                                                                                | `1`                    |
 | `taskmanager.resources`                  | Taskmanager resources                                                                                                                                                    | `{}`                   |
 | `taskmanager.resources`                  | Taskmanager resources                                                                                                                                                    | `{}`                   |
-| `zookeeper.enabled`                      | If True, installs Zookeeper Chart                                                                                                                                        | `false`                |
+| `zookeeper.enabled`                      | If True, installs Zookeeper Chart                                                                                                                                        | `true`                |
 | `zookeeper.resources`                    | Zookeeper resource requests and limits                                                                                                                                   | `{}`                   |
 | `zookeeper.resources`                    | Zookeeper resource requests and limits                                                                                                                                   | `{}`                   |
 | `zookeeper.env`                          | Environmental variables provided to Zookeeper Zookeeper                                                                                                                  | `{ZK_HEAP_SIZE: "1G"}` |
 | `zookeeper.env`                          | Environmental variables provided to Zookeeper Zookeeper                                                                                                                  | `{ZK_HEAP_SIZE: "1G"}` |
 | `zookeeper.storage`                      | Zookeeper Persistent volume size                                                                                                                                         | `2Gi`                  |
 | `zookeeper.storage`                      | Zookeeper Persistent volume size                                                                                                                                         | `2Gi`                  |
@@ -85,8 +85,14 @@ following configurable parameters(other parameters can be found in values.yaml):
 
 
 You can install this chart with enabled HA based on Zookeeper by provided follow parameters:
 You can install this chart with enabled HA based on Zookeeper by provided follow parameters:
 ```
 ```
-$ helm install --name my-flink riskfoucs/flink --set \
-zookeeper.enabled=true,jobmanager.replicaCount=2,jobmanager.highAvailability.enabled=true,jobmanager.highAvailability.storageDir=s3://MY_BUCKET/flink/jobmanager
+$ helm install --name my-flink riskfoucs/flink --set jobmanager.highAvailability.storageDir=s3p://<s3_bucket>/,state.checkpoints.dir=s3p://<s3_bucket>/flink_state/checkpoints,state.savepoints.dir=s3p://<s3_bucket>/flink_state/savepoints
 ```
 ```
 * storageDir can be different for your installation, see 
 * storageDir can be different for your installation, see 
   https://ci.apache.org/projects/flink/flink-docs-stable/ops/config.html#high-availability-storagedir
   https://ci.apache.org/projects/flink/flink-docs-stable/ops/config.html#high-availability-storagedir
+
+
+
+
+
+
+--set jobmanager.highAvailability.storageDir=s3p://rf-flink-cluster-us-e-destination-s3/flink_state/recovery,state.checkpoints.dir=s3p://rf-flink-cluster-us-e-destination-s3/flink_state/checkpoints,state.savepoints.dir=s3p://rf-flink-cluster-us-e-destination-s3/flink_state/savepoints

BIN
flink/charts/zookeeper-2.1.0.tgz


+ 6 - 0
flink/requirements.lock

@@ -0,0 +1,6 @@
+dependencies:
+- name: zookeeper
+  repository: https://charts.helm.sh/incubator
+  version: 2.1.0
+digest: sha256:879d24b28bd414d66e5fd98e0676010e2b857fde3a10110a7aef448d57bc059e
+generated: "2021-02-05T13:45:38.165747+01:00"

+ 1 - 1
flink/requirements.yaml

@@ -1,6 +1,6 @@
 dependencies:
 dependencies:
   - name:       "zookeeper"
   - name:       "zookeeper"
-    version:    "2.0.0"
+    version:    "2.1.0"
     repository: "https://charts.helm.sh/incubator"
     repository: "https://charts.helm.sh/incubator"
     condition:  "zookeeper.enabled"
     condition:  "zookeeper.enabled"
 
 

+ 25 - 18
flink/values.yaml

@@ -7,7 +7,7 @@ fullnameOverride: ""
 
 
 image:
 image:
   repository: flink
   repository: flink
-  tag: 1.11.2-scala_2.12
+  tag: 1.12.1-scala_2.12-java11
   pullPolicy: IfNotPresent
   pullPolicy: IfNotPresent
 imagePullSecrets: []
 imagePullSecrets: []
 
 
@@ -83,19 +83,19 @@ flink:
   params: ""
   params: ""
   state:
   state:
     # backend for state. Available options: filesystem, rocksdb, memory; empty - for default(memory)
     # backend for state. Available options: filesystem, rocksdb, memory; empty - for default(memory)
-    backend:
+    backend: filesystem
     # These values are default excludes file pathes
     # These values are default excludes file pathes
     # https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/state/checkpointing.html#related-config-options
     # https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/state/checkpointing.html#related-config-options
     params: |+
     params: |+
-      state.checkpoints.dir: file:///flink_state/checkpoints
-      state.savepoints.dir: file:///flink_state/savepoints
+      state.checkpoints.dir: s3p://<s3_bucket>/flink_state/checkpoints
+      state.savepoints.dir: s3p://<s3_bucket>/flink_state/savepoints
       state.backend.async: true
       state.backend.async: true
       state.backend.fs.memory-threshold: 1024
       state.backend.fs.memory-threshold: 1024
       state.backend.fs.write-buffer-size: 4096
       state.backend.fs.write-buffer-size: 4096
       state.backend.incremental: false
       state.backend.incremental: false
       state.backend.local-recovery: false
       state.backend.local-recovery: false
-      state.checkpoints.num-retained: 1
-      taskmanager.state.local.root-dirs: file:///flink_state/local-recovery
+      state.checkpoints.num-retained: 30
+      taskmanager.state.local.root-dirs: s3p://<s3_bucket>/flink_state/local-recovery
     # https://ci.apache.org/projects/flink/flink-docs-stable/ops/state/state_backends.html#rocksdb-state-backend-config-options
     # https://ci.apache.org/projects/flink/flink-docs-stable/ops/state/state_backends.html#rocksdb-state-backend-config-options
     # * state.backend.rocksdb.localdir doesn't have a prefix - file://
     # * state.backend.rocksdb.localdir doesn't have a prefix - file://
     rocksdb: |+
     rocksdb: |+
@@ -129,13 +129,13 @@ jobmanager:
   # highAvailability configuration based on zookeeper
   # highAvailability configuration based on zookeeper
   highAvailability:
   highAvailability:
     # enabled also will enable zookeeper Dependency
     # enabled also will enable zookeeper Dependency
-    enabled: false
+    enabled: true
     zookeeperConnect: "{{ .Release.Name }}-zookeeper:{{ .Values.zookeeper.env.ZOO_PORT }}"
     zookeeperConnect: "{{ .Release.Name }}-zookeeper:{{ .Values.zookeeper.env.ZOO_PORT }}"
     zookeeperRootPath: /flink
     zookeeperRootPath: /flink
-    clusterId: /flink
+    clusterId: <cluster-id>
     # storageDir for Jobmanagers. DFS expected.
     # storageDir for Jobmanagers. DFS expected.
     # Docs - Storage directory (required): JobManager metadata is persisted in the file system storageDir and only a pointer to this state is stored in ZooKeeper
     # Docs - Storage directory (required): JobManager metadata is persisted in the file system storageDir and only a pointer to this state is stored in ZooKeeper
-    storageDir:
+    storageDir: s3p://<s3_bucket>/flink_state/recovery
     # syncPort is a rpc port in HA configuration
     # syncPort is a rpc port in HA configuration
     syncPort: 6150
     syncPort: 6150
     # command for HA configuration
     # command for HA configuration
@@ -156,7 +156,7 @@ jobmanager:
     # blob port uses for Liveness probe
     # blob port uses for Liveness probe
     blob: 6124
     blob: 6124
     ui: 8081
     ui: 8081
-  replicaCount: 1
+  replicaCount: 2
   # heapSize params for Jobmanager
   # heapSize params for Jobmanager
   # keep in mind that Flink can use offheap memory
   # keep in mind that Flink can use offheap memory
   # e.g. in case of checkpoint usage
   # e.g. in case of checkpoint usage
@@ -167,8 +167,11 @@ jobmanager:
   #      cpu: 3800m
   #      cpu: 3800m
   #      memory: 8000Mi
   #      memory: 8000Mi
   additionalCommand: >-
   additionalCommand: >-
-    cp /opt/flink/opt/flink-s3-fs-presto-*.jar
-    /opt/flink/lib/ &&
+    apt-get update && 
+    apt-get -y install awscli &&
+    mkdir -p /opt/flink/plugins/s3-fs-presto/ &&
+    cp /opt/flink/opt/flink-s3-fs-presto-1.12.1.jar /opt/flink/lib/flink-s3-fs-presto-1.12.1.jar &&
+    cp /opt/flink/opt/flink-s3-fs-presto-1.12.1.jar /opt/flink/plugins/s3-fs-presto/flink-s3-fs-presto-1.12.1.jar &&
     wget https://repo1.maven.org/maven2/com/github/oshi/oshi-core/3.4.0/oshi-core-3.4.0.jar
     wget https://repo1.maven.org/maven2/com/github/oshi/oshi-core/3.4.0/oshi-core-3.4.0.jar
     -O /opt/flink/lib/oshi-core-3.4.0.jar &&
     -O /opt/flink/lib/oshi-core-3.4.0.jar &&
     wget https://repo1.maven.org/maven2/net/java/dev/jna/jna/5.4.0/jna-5.4.0.jar
     wget https://repo1.maven.org/maven2/net/java/dev/jna/jna/5.4.0/jna-5.4.0.jar
@@ -179,8 +182,9 @@ jobmanager:
     cp $FLINK_HOME/conf/flink-conf.yaml.tpl $FLINK_HOME/conf/flink-conf.yaml &&
     cp $FLINK_HOME/conf/flink-conf.yaml.tpl $FLINK_HOME/conf/flink-conf.yaml &&
     $FLINK_HOME/bin/jobmanager.sh start-foreground;
     $FLINK_HOME/bin/jobmanager.sh start-foreground;
   service:
   service:
-    type: ClusterIP
-    annotations: {}
+    type: LoadBalancer
+    annotations: 
+      service.beta.kubernetes.io/aws-load-balancer-internal: "true"
     # rest is additional service which exposes only HTTP port
     # rest is additional service which exposes only HTTP port
     # can be using for cases of using exposeController
     # can be using for cases of using exposeController
     rest:
     rest:
@@ -238,8 +242,11 @@ taskmanager:
   #      cpu: 3800m
   #      cpu: 3800m
   #      memory: 8000Mi
   #      memory: 8000Mi
   additionalCommand: >-
   additionalCommand: >-
-    cp /opt/flink/opt/flink-s3-fs-presto-*.jar
-    /opt/flink/lib/ &&
+    apt-get update && 
+    apt-get -y install awscli &&
+    mkdir -p /opt/flink/plugins/s3-fs-presto/ &&
+    cp /opt/flink/opt/flink-s3-fs-presto-1.12.1.jar /opt/flink/lib/flink-s3-fs-presto-1.12.1.jar &&
+    cp /opt/flink/opt/flink-s3-fs-presto-1.12.1.jar /opt/flink/plugins/s3-fs-presto/flink-s3-fs-presto-1.12.1.jar &&
     wget https://repo1.maven.org/maven2/com/github/oshi/oshi-core/3.4.0/oshi-core-3.4.0.jar
     wget https://repo1.maven.org/maven2/com/github/oshi/oshi-core/3.4.0/oshi-core-3.4.0.jar
     -O /opt/flink/lib/oshi-core-3.4.0.jar &&
     -O /opt/flink/lib/oshi-core-3.4.0.jar &&
     wget https://repo1.maven.org/maven2/net/java/dev/jna/jna/5.4.0/jna-5.4.0.jar
     wget https://repo1.maven.org/maven2/net/java/dev/jna/jna/5.4.0/jna-5.4.0.jar
@@ -293,8 +300,8 @@ prometheus:
       prometheus: kube-prometheus
       prometheus: kube-prometheus
 
 
 zookeeper:
 zookeeper:
-  enabled: false
-  replicaCount: 3
+  enabled: true
+  replicaCount: 2
   env:
   env:
     ZK_HEAP_SIZE: "1G"
     ZK_HEAP_SIZE: "1G"
     ZOO_PORT: 2181
     ZOO_PORT: 2181