diff --git a/README.md b/README.md index a38aa360f064078a970314543f05192f2df9af6b..6ad1dd576bc165630fb378234102f324f9b66d8a 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,18 @@ > A theodolite is a precision optical instrument for measuring angles between designated visible points in the horizontal and vertical planes. -- <cite>[Wikipedia](https://en.wikipedia.org/wiki/Theodolite)</cite> -Theodolite is a framework for benchmarking the horizontal and vertical scalability of stream processing engines. +Theodolite is a framework for benchmarking the horizontal and vertical scalability of stream processing engines. It consists of three modules: + +## Theodolite Benchmarks + +Theodolite contains 4 application benchmarks, which are based on typical use cases for stream processing within microservices. For each benchmark, a corresponding workload generator is provided. Currently, this repository provides benchmark implementations for Kafka Streams. + + +## Theodolite Execution Framework + +Theodolite aims to benchmark scalability of stream processing engines for real use cases. Microservices that apply stream processing techniques are usually deployed in elastic cloud environments. Hence, Theodolite's cloud-native benchmarking framework deploys as components in a cloud environment, orchestrated by Kubernetes. More information on how to execute scalability benchmarks can be found in [Thedolite execution framework](execution). + + +## Theodolite Analysis Tools + +Theodolite's benchmarking method create a *scalability graph* allowing to draw conclusions about the scalability of a stream processing engine or its deployment. A scalability graph shows how resource demand evolves with an increasing workload. Theodolite provides Jupyter notebooks for creating such scalability graphs based on benchmarking results from the execution framework. More information can be found in [Theodolite analysis tool](analysis). diff --git a/analysis/README.md b/analysis/README.md index 5318425825a51b4ab118bb1c6c11dfc92037c6a1..263b1db16fcabefe5409ebe744afe5997bc90d89 100644 --- a/analysis/README.md +++ b/analysis/README.md @@ -17,6 +17,6 @@ Python libraries, which can be installed via: pip install -r requirements.txt ``` - We have tested these +We have tested these notebooks with [Visual Studio Code](https://code.visualstudio.com/docs/python/jupyter-support), however, every other server should be fine as well. diff --git a/execution/.gitignore b/execution/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d4dceff0274cd6ab3296e85e995f7e5d504f114d --- /dev/null +++ b/execution/.gitignore @@ -0,0 +1 @@ +exp_counter.txt \ No newline at end of file diff --git a/execution/README.md b/execution/README.md index 7dd9bf5d13c1eb578d67f03aefdbb7a47558e533..be24298b0ece20271f66c6750b5bf7eeb9177ab0 100644 --- a/execution/README.md +++ b/execution/README.md @@ -95,7 +95,15 @@ kubectl apply -f infrastructure/kafka/service-monitor.yaml ##### Other options for Kafka -Other Kafka deployments, for example, using Strimzi, should work in similiar way. +Other Kafka deployments, for example, using Strimzi, should work in a similar way. + +#### A Kafka Client Pod + +A permanently running pod used for Kafka configuration is started via: + +```sh +kubectl apply -f infrastructure/kafka/kafka-client.yaml +``` #### The Kafka Lag Exporter diff --git a/execution/execution.sh b/execution/execution.sh deleted file mode 100755 index 0a1ead95049564b9d88f35d40ea622788119e4dc..0000000000000000000000000000000000000000 --- a/execution/execution.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -./run_loop.sh 1 "25000 50000 75000 100000 125000 150000" "1 2 3 4 5" 40 #6*5=3Std -sleep 5m -./run_loop.sh 2 "6 7 8 9" "1 2 3 4 6 8 10 12 14 16 18 20" 40 #4*12=5Std -sleep 5m -./run_loop.sh 3 "25000 50000 75000 100000 125000 150000" "1 2 3 4 5 6" 40 #6*6=3.5Std -sleep 5m -./run_loop.sh 4 "25000 50000 75000 100000 125000 150000" "1 2 4 6 8 10 12 14 16 18 20 30 40 50 60 70 80 90" 40 #6*18=11Std -sleep 5m - -./run_loop.sh 1 "25000 50000 75000 100000 125000 150000" "1 2 3 4 5" 400 #6*5=3Std -sleep 5m -./run_loop.sh 2 "6 7 8 9" "1 2 3 4 6 8 10 12 14 16 18 20" 400 #4*12=5Std -sleep 5m -./run_loop.sh 3 "25000 50000 75000 100000 125000 150000" "1 2 3 4 5 6" 400 #6*6=3.5Std -sleep 5m -./run_loop.sh 4 "25000 50000 75000 100000 125000 150000" "1 2 4 6 8 10 12 14 16 18 20 30 40 50 60 70 80 90" 400 #6*18=11Std -sleep 5m -./run_loop.sh 4 "150000" "100 110 120 130 140 150 160 17 18 190 200" 400 #6*18=11Std -sleep 5m -# For commit interval evaluation -./run_loop.sh 4 "5000 10000 15000 20000 25000 30000" "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15" 160 \ No newline at end of file diff --git a/execution/execution_tmp_200507.sh b/execution/execution_tmp_200507.sh deleted file mode 100644 index 932940ae78dc5e5f0d2362da1047329a22713f51..0000000000000000000000000000000000000000 --- a/execution/execution_tmp_200507.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -#./run_loop.sh 1 "50000 100000 150000 200000 250000 300000" "1 2 3 4 5" 40 #3Std -./run_loop.sh 1 "200000 250000 300000" "1 2 3 4 5" 40 1000m 4Gi 100 5 #1.5Std -sleep 1m -#./run_loop.sh 1 "50000 100000 150000 200000 250000 300000" "1 2 3 4 5" 400 #3Std -./run_loop.sh 1 "200000 250000 300000" "1 2 3 4 5" 400 1000m 4Gi 100 5 #1.5Std -sleep 1m - -#./run_loop.sh 3 "50000 100000 150000 200000 250000 300000" "1 2 3 4 5 6 7 8 9 10" 40 #6 Std -./run_loop.sh 3 "200000 250000 300000" "1 2 3 4 5 6 7 8 9 10" 40 1000m 4Gi 100 5 #3 Std -sleep 1m -#./run_loop.sh 3 "50000 100000 150000 200000 250000 300000" "1 2 3 4 5 6 7 8 9 10" 400 #6 Std -./run_loop.sh 3 "200000 250000 300000" "1 2 3 4 5 6 7 8 9 10" 400 1000m 4Gi 100 5 #3 Std -sleep 1m - -./run_loop.sh 1 "50000 100000 150000 200000 250000 300000" "1 2 3 4 5" 40 500m 2Gi 100 5 #3Std diff --git a/execution/exp_counter.txt b/execution/exp_counter.txt index 573541ac9702dd3969c9bc859d2b91ec1f7e6e56..d00491fd7e5bb6fa28c517a0bb32b8b506539d4d 100644 --- a/execution/exp_counter.txt +++ b/execution/exp_counter.txt @@ -1 +1 @@ -0 +1 diff --git a/execution/infrastructure/kafka/kafka-client.yaml b/execution/infrastructure/kafka/kafka-client.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c7d3ed239faed62022c110e92b264b338a8c9a4 --- /dev/null +++ b/execution/infrastructure/kafka/kafka-client.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Pod +metadata: + name: kafka-client-2 +spec: + containers: + - name: kafka-client + image: confluentinc/cp-enterprise-kafka:5.4.0 + command: + - sh + - -c + - "exec tail -f /dev/null" \ No newline at end of file diff --git a/execution/run_uc1.sh b/execution/run_uc1.sh index c54780d59243c523ae1e00abd9ea62970dfa97b4..59298612224386c2bd4efd62e4fa91c452a55604 100755 --- a/execution/run_uc1.sh +++ b/execution/run_uc1.sh @@ -34,7 +34,11 @@ echo "$WORKLOAD_GENERATOR_YAML" | kubectl apply -f - # Start application REPLICAS=$INSTANCES -#kubectl apply -f uc3-application/aggregation-deployment.yaml +# When not using `sed` anymore, use `kubectl apply -f uc1-application` +kubectl apply -f uc1-application/aggregation-service.yaml +kubectl apply -f uc1-application/jmx-configmap.yaml +kubectl apply -f uc1-application/service-monitor.yaml +#kubectl apply -f uc1-application/aggregation-deployment.yaml APPLICATION_YAML=$(sed "s/{{CPU_LIMIT}}/$CPU_LIMIT/g; s/{{MEMORY_LIMIT}}/$MEMORY_LIMIT/g; s/{{KAFKA_STREAMS_COMMIT_INTERVAL_MS}}/$KAFKA_STREAMS_COMMIT_INTERVAL_MS/g" uc1-application/aggregation-deployment.yaml) echo "$APPLICATION_YAML" | kubectl apply -f - kubectl scale deployment titan-ccp-aggregation --replicas=$REPLICAS @@ -52,6 +56,9 @@ deactivate #sed "s/{{INSTANCES}}/1/g" uc1-workload-generator/deployment.yaml | kubectl delete -f - #sed "s/{{NUM_SENSORS}}/$NUM_SENSORS/g; s/{{INSTANCES}}/$WL_INSTANCES/g" uc1-workload-generator/deployment.yaml | kubectl delete -f - echo "$WORKLOAD_GENERATOR_YAML" | kubectl delete -f - +kubectl delete -f uc1-application/aggregation-service.yaml +kubectl delete -f uc1-application/jmx-configmap.yaml +kubectl delete -f uc1-application/service-monitor.yaml #kubectl delete -f uc1-application/aggregation-deployment.yaml echo "$APPLICATION_YAML" | kubectl delete -f - diff --git a/execution/run_uc2.sh b/execution/run_uc2.sh index 5e2f93076f430510245836740c32a083e622f04a..e1571bf33ced96f94b261d4ab07af41338c4d88c 100755 --- a/execution/run_uc2.sh +++ b/execution/run_uc2.sh @@ -30,6 +30,10 @@ sed "s/{{NUM_NESTED_GROUPS}}/$NUM_NESTED_GROUPS/g" uc2-workload-generator/deploy # Start application REPLICAS=$INSTANCES +# When not using `sed` anymore, use `kubectl apply -f uc2-application` +kubectl apply -f uc2-application/aggregation-service.yaml +kubectl apply -f uc2-application/jmx-configmap.yaml +kubectl apply -f uc2-application/service-monitor.yaml #kubectl apply -f uc2-application/aggregation-deployment.yaml APPLICATION_YAML=$(sed "s/{{CPU_LIMIT}}/$CPU_LIMIT/g; s/{{MEMORY_LIMIT}}/$MEMORY_LIMIT/g; s/{{KAFKA_STREAMS_COMMIT_INTERVAL_MS}}/$KAFKA_STREAMS_COMMIT_INTERVAL_MS/g" uc2-application/aggregation-deployment.yaml) echo "$APPLICATION_YAML" | kubectl apply -f - @@ -45,6 +49,9 @@ deactivate # Stop wl and app kubectl delete -f uc2-workload-generator/deployment.yaml +kubectl delete -f uc2-application/aggregation-service.yaml +kubectl delete -f uc2-application/jmx-configmap.yaml +kubectl delete -f uc2-application/service-monitor.yaml #kubectl delete -f uc2-application/aggregation-deployment.yaml echo "$APPLICATION_YAML" | kubectl delete -f - diff --git a/execution/run_uc3.sh b/execution/run_uc3.sh index ce2862202d633180255dd4f43d6327476b58a17e..99d005858b165749a861a908bb0e4e2b9637ed10 100755 --- a/execution/run_uc3.sh +++ b/execution/run_uc3.sh @@ -34,6 +34,10 @@ echo "$WORKLOAD_GENERATOR_YAML" | kubectl apply -f - # Start application REPLICAS=$INSTANCES +# When not using `sed` anymore, use `kubectl apply -f uc3-application` +kubectl apply -f uc3-application/aggregation-service.yaml +kubectl apply -f uc3-application/jmx-configmap.yaml +kubectl apply -f uc3-application/service-monitor.yaml #kubectl apply -f uc3-application/aggregation-deployment.yaml APPLICATION_YAML=$(sed "s/{{CPU_LIMIT}}/$CPU_LIMIT/g; s/{{MEMORY_LIMIT}}/$MEMORY_LIMIT/g; s/{{KAFKA_STREAMS_COMMIT_INTERVAL_MS}}/$KAFKA_STREAMS_COMMIT_INTERVAL_MS/g" uc3-application/aggregation-deployment.yaml) echo "$APPLICATION_YAML" | kubectl apply -f - @@ -51,7 +55,10 @@ deactivate #kubectl delete -f uc3-workload-generator/deployment.yaml #sed "s/{{INSTANCES}}/1/g" uc3-workload-generator/deployment.yaml | kubectl delete -f - echo "$WORKLOAD_GENERATOR_YAML" | kubectl delete -f - -#kubectl delete -f uc1-application/aggregation-deployment.yaml +kubectl delete -f uc3-application/aggregation-service.yaml +kubectl delete -f uc3-application/jmx-configmap.yaml +kubectl delete -f uc3-application/service-monitor.yaml +#kubectl delete -f uc3-application/aggregation-deployment.yaml #sed "s/{{CPU_LIMIT}}/1000m/g; s/{{MEMORY_LIMIT}}/4Gi/g; s/{{KAFKA_STREAMS_COMMIT_INTERVAL_MS}}/100/g" uc3-application/aggregation-deployment.yaml | kubectl delete -f - echo "$APPLICATION_YAML" | kubectl delete -f - diff --git a/execution/run_uc4.sh b/execution/run_uc4.sh index 5ae4cf769e5dec2be8660505f9a923506bb1f679..5864aec8433c65fa6ff9456801f38bd1604883a2 100755 --- a/execution/run_uc4.sh +++ b/execution/run_uc4.sh @@ -32,6 +32,10 @@ sed "s/{{NUM_SENSORS}}/$NUM_SENSORS/g" uc4-workload-generator/deployment.yaml | # Start application REPLICAS=$INSTANCES #AGGREGATION_DURATION_DAYS=$DIM_VALUE +# When not using `sed` anymore, use `kubectl apply -f uc4-application` +kubectl apply -f uc4-application/aggregation-service.yaml +kubectl apply -f uc4-application/jmx-configmap.yaml +kubectl apply -f uc4-application/service-monitor.yaml #kubectl apply -f uc4-application/aggregation-deployment.yaml #sed "s/{{AGGREGATION_DURATION_DAYS}}/$AGGREGATION_DURATION_DAYS/g" uc4-application/aggregation-deployment.yaml | kubectl apply -f - APPLICATION_YAML=$(sed "s/{{CPU_LIMIT}}/$CPU_LIMIT/g; s/{{MEMORY_LIMIT}}/$MEMORY_LIMIT/g; s/{{KAFKA_STREAMS_COMMIT_INTERVAL_MS}}/$KAFKA_STREAMS_COMMIT_INTERVAL_MS/g" uc4-application/aggregation-deployment.yaml) @@ -48,6 +52,9 @@ deactivate # Stop wl and app kubectl delete -f uc4-workload-generator/deployment.yaml +kubectl delete -f uc4-application/aggregation-service.yaml +kubectl delete -f uc4-application/jmx-configmap.yaml +kubectl delete -f uc4-application/service-monitor.yaml #kubectl delete -f uc4-application/aggregation-deployment.yaml echo "$APPLICATION_YAML" | kubectl delete -f - diff --git a/execution/theodolite.sh b/execution/theodolite.sh index 196662ffcd968cd732d1e12ce79e7639cb12cd55..18a6b67a9c321cd1c0ecebca405169ec5b8ade46 100755 --- a/execution/theodolite.sh +++ b/execution/theodolite.sh @@ -10,7 +10,7 @@ KAFKA_STREAMS_COMMIT_INTERVAL_MS=${7:-100} EXECUTION_MINUTES=${8:-5} # Get and increment counter -EXP_ID=$(cat exp_counter.txt) +EXP_ID=$(cat exp_counter.txt 2>/dev/null || echo "0") echo $((EXP_ID+1)) > exp_counter.txt # Store meta information diff --git a/execution/uc1-workload-generator/jmx-configmap.yaml b/execution/uc1-workload-generator/jmx-configmap.yaml deleted file mode 100644 index ea16037d74978a9273936c26eb06420983dd3139..0000000000000000000000000000000000000000 --- a/execution/uc1-workload-generator/jmx-configmap.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: load-generator-jmx-configmap -data: - jmx-kafka-prometheus.yml: |+ - jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi - lowercaseOutputName: true - lowercaseOutputLabelNames: true - ssl: false diff --git a/execution/uc2-workload-generator/jmx-configmap.yaml b/execution/uc2-workload-generator/jmx-configmap.yaml deleted file mode 100644 index ea16037d74978a9273936c26eb06420983dd3139..0000000000000000000000000000000000000000 --- a/execution/uc2-workload-generator/jmx-configmap.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: load-generator-jmx-configmap -data: - jmx-kafka-prometheus.yml: |+ - jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi - lowercaseOutputName: true - lowercaseOutputLabelNames: true - ssl: false diff --git a/execution/uc3-workload-generator/jmx-configmap.yaml b/execution/uc3-workload-generator/jmx-configmap.yaml deleted file mode 100644 index ea16037d74978a9273936c26eb06420983dd3139..0000000000000000000000000000000000000000 --- a/execution/uc3-workload-generator/jmx-configmap.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: load-generator-jmx-configmap -data: - jmx-kafka-prometheus.yml: |+ - jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi - lowercaseOutputName: true - lowercaseOutputLabelNames: true - ssl: false diff --git a/execution/uc4-workload-generator/jmx-configmap.yaml b/execution/uc4-workload-generator/jmx-configmap.yaml deleted file mode 100644 index ea16037d74978a9273936c26eb06420983dd3139..0000000000000000000000000000000000000000 --- a/execution/uc4-workload-generator/jmx-configmap.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: load-generator-jmx-configmap -data: - jmx-kafka-prometheus.yml: |+ - jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi - lowercaseOutputName: true - lowercaseOutputLabelNames: true - ssl: false