Merge branch 'master' into quarkus-upgrade

1428510a · Sören Henning · 737710f3 · 851f4bd7 · 1428510a · 1428510a
Commit 1428510a authored 3 years ago by Sören Henning
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -143,6 +143,7 @@ test-benchmarks:
    - build-benchmarks
  script: ./gradlew test --continue
  artifacts:
+    when: always
    reports:
      junit:
        - "theodolite-benchmarks/**/build/test-results/test/TEST-*.xml"
@@ -396,6 +397,7 @@ test-theodolite:
    #- build-theodolite-native
  script: ./gradlew test --stacktrace
  artifacts:
+    when: always
    reports:
      junit:
        - "theodolite/**/build/test-results/test/TEST-*.xml"

--- a/binder/requirements.txt
+++ b/binder/requirements.txt
+../analysis/requirements.txt
\ No newline at end of file
--- a/docs/creating-an-execution.md
+++ b/docs/creating-an-execution.md
@@ -58,7 +58,29 @@ As a Benchmark may define multiple supported load and resource types, an Executi
 ## Definition of SLOs
 SLOs provide a way to quantify whether a certain load intensity can be handled by a certain amount of provisioned resources.
-An Execution must at least specify one SLO to be checked.
+In Theodolite, SLO are evaluated by requesting monitoring data from Prometheus and analyzing it in a benchmark-specific way.
+An Execution must at least define one SLO to be checked.
+A good choice to get started is defining an SLO of type `generic`:
+```yaml
+- sloType: "generic"
+  prometheusUrl: "http://prometheus-operated:9090"
+  offset: 0
+  properties:
+    externalSloUrl: "http://localhost:8082"
+    promQLQuery: "sum by(job) (kafka_streams_stream_task_metrics_dropped_records_total>=0)"
+    warmup: 60 # in seconds
+    queryAggregation: max
+    repetitionAggregation: median
+    operator: lte
+    threshold: 1000
+```
+All you have to do is to define a [PromQL query](https://prometheus.io/docs/prometheus/latest/querying/basics/) describing which metrics should be requested (`promQLQuery`) and how the resulting time series should be evaluated. With `queryAggregation` you specify how the resulting time series is aggregated to a single value and `repetitionAggregation` describes how the results of multiple repetitions are aggregated. Possible values are
+`mean`, `median`, `mode`, `sum`, `count`, `max`, `min`, `std`, `var`, `skew`, `kurt` as well as percentiles such as `p99` or `p99.9`. The result of aggregation all repetitions is checked against `threshold`. This check is performed using an `operator`, which describes that the result must be "less than" (`lt`), "less than equal" (`lte`), "greater than" (`gt`) or "greater than equal" (`gte`) to the threshold.
+In case you need to evaluate monitoring data in a more flexible fashion, you can also change the value of `externalSloUrl` to your custom SLO checker. Have a look at the source code of the [generic SLO checker](https://github.com/cau-se/theodolite/tree/master/slo-checker/generic) to get started.
 ## Experimental Setup
@@ -72,7 +94,7 @@ The experimental setup can be configured by:
 ## Configuration Overrides
-In cases where only small modifications of a system under test should be benchmarked, it is not necessarily required to [create a new benchmark](creating-a-benchmark).
+In cases where only small modifications of a system under test should be benchmarked, it is not necessary to [create a new benchmark](creating-a-benchmark).
 Instead, also Executions allow to do small reconfigurations, such as switching on or off a specific Pod scheduler.
 This is done by defining `configOverrides` in the Execution. Each override consists of a patcher, defining which Kubernetes resource should be patched in which way, and a value the patcher is applied with.

--- a/docs/running-benchmarks.md
+++ b/docs/running-benchmarks.md
@@ -11,6 +11,7 @@ Running scalability benchmarks with Theodolite involves the following steps:
 1. [Deploying a benchmark to Kubernetes](#deploying-a-benchmark)
 1. [Creating an execution](#creating-an-execution), which describes the experimental setup for running the benchmark
 1. [Accessing benchmark results](#accessing-benchmark-results)
+1. [Analyzing benchmark results](#analyzing-benchmark-results) with Theodolite's Jupyter notebooks
 ## Deploying a Benchmark
@@ -131,3 +132,32 @@ For installations without persistence, but also as an alternative for installati
 ```sh
 kubectl cp $(kubectl get pod -l app=theodolite -o jsonpath="{.items[0].metadata.name}"):/results . -c results-access
 ```
+## Analyzing Benchmark Results
+Theodolite comes with Jupyter notebooks for analyzing and visualizing benchmark execution results.
+The easiest way to use them is at MyBinder:
+[Launch Notebooks](https://mybinder.org/v2/gh/cau-se/theodolite/HEAD?labpath=analysis){: .btn .btn-primary }
+{: .text-center }
+Alternatively, you can also [run these notebook locally](https://github.com/cau-se/theodolite/tree/master/analysis), for example, with Docker or Visual Studio Code.
+The notebooks allow to compute a scalability function using its *demand* metric and to visualize multiple such functions in plots:
+### Computing the *demand* metric with `demand-metric.ipynb` (optional)
+After finishing a benchmark execution, Theodolite creates a `exp<id>_demand.csv` file. It maps the tested load intensities to the minimal required resources for that load. If the monitoring data collected during benchmark execution should be analyzed in more detail, the `demand-metric.ipynb` notebook can be used. 
+Theodolite stores monitoring data for each conducted SLO experiment in `exp<id>_<load>_<resources>_<slo-slug>_<rep>.csv` files, where `<id>` is the ID of an execution, `<load>` the corresponding load intensity value, `<resources>` the resources value, `<slo-slug>` the [name of the SLO](creating-an-execution.html#definition-of-slos) and `<rep>` the repetition counter.
+The `demand-metric.ipynb` notebook reads these files and generates a new CSV file mapping load intensities to the minimal required resources. The format of this file corresponds to the original `exp<id>_demand.csv` file created when running the benchmark, but allows, for example, to evaluate different warm-up periods.
+Currently, the `demand-metric.ipynb` notebook only supports benchmarks with the *lag trend SLO* out-of-the-box, but can easily be adjusted to perform any other type of analysis.
+### Plotting benchmark results with the *demand* metric with `demand-metric-plot.ipynb`
+The `demand-metric-plot.ipynb` takes one or multiple `exp<id>_demand.csv` files as input and visualize them together in a plot.
+Input files can either be taken directly from Theodolite, or created from the `demand-metric.ipynb` notebooks.
+All plotting code is only intended to serve as a template. Adjust it as needed to change colors, labels, formatting, etc. as needed. 
+Please refer to the official docs of [MatPlotLib](https://matplotlib.org/) and the [ggplot](https://matplotlib.org/stable/gallery/style_sheets/ggplot.html) style, which are used to generate the plots.
--- a/helm/preconfigs/minimal.yaml
+++ b/helm/preconfigs/minimal.yaml
@@ -8,5 +8,8 @@ cp-helm-charts:
      offsets.topic.replication.factor: "1"
 operator:
+  sloChecker:
+    droppedRecordsKStreams:
+      enabled: false
  resultsVolume:
    enabled: false
--- a/helm/templates/theodolite/theodolite-operator.yaml
+++ b/helm/templates/theodolite/theodolite-operator.yaml
@@ -31,6 +31,19 @@ spec:
          volumeMounts:
            - name: theodolite-results-volume
              mountPath: "/deployments/results"
+        {{- if .Values.operator.sloChecker.droppedRecordsKStreams.enabled }}
+        - name: slo-checker-generic
+          image: "{{ .Values.operator.sloChecker.generic.image }}:{{ .Values.operator.sloChecker.generic.imageTag }}"
+          imagePullPolicy: "{{ .Values.operator.sloChecker.generic.imagePullPolicy }}"
+          ports:
+          - containerPort: 8082
+            name: analysis
+          env:
+          - name: PORT
+            value: "8082"
+          - name: LOG_LEVEL
+            value: INFO
+        {{- end }}
        {{- if .Values.operator.sloChecker.lagTrend.enabled }}
        - name: lag-trend-slo-checker
          image: "{{ .Values.operator.sloChecker.lagTrend.image }}:{{ .Values.operator.sloChecker.lagTrend.imageTag }}"

--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -256,6 +256,11 @@ operator:
  nodeSelector: {}
  sloChecker:
+    generic:
+      enabled: true
+      image: ghcr.io/cau-se/theodolite-slo-checker-generic
+      imageTag: latest
+      imagePullPolicy: Always
    lagTrend:
      enabled: true
      image: ghcr.io/cau-se/theodolite-slo-checker-lag-trend

--- a/slo-checker/generic/app/main.py
+++ b/slo-checker/generic/app/main.py
@@ -57,15 +57,14 @@ def check_result(result, operator: str, threshold):
 @app.post("/",response_model=bool)
 async def check_slo(request: Request):
    data = json.loads(await request.body())
+    logger.info('Received request with metadata: %s', data['metadata'])
    warmup = int(data['metadata']['warmup'])
    query_aggregation = get_aggr_func(data['metadata']['queryAggregation'])
    rep_aggregation = get_aggr_func(data['metadata']['repetitionAggregation'])
    operator = data['metadata']['operator']
    threshold = int(data['metadata']['threshold'])
-    for r in data["results"]:
-        aggr_query(r[0]["values"], warmup, query_aggregation)
    query_results = [aggr_query(r[0]["values"], warmup, query_aggregation) for r in data["results"]]
    result = pd.DataFrame(query_results).aggregate(rep_aggregation).at[0]
    return check_result(result, operator, threshold)

--- a/theodolite/src/main/kotlin/theodolite/evaluation/SloCheckerFactory.kt
+++ b/theodolite/src/main/kotlin/theodolite/evaluation/SloCheckerFactory.kt
@@ -50,12 +50,11 @@ class SloCheckerFactory {
                // TODO validate property contents
                metadata = mapOf(
                    "warmup" to (properties["warmup"]?.toInt() ?: throw IllegalArgumentException("warmup expected")),
-                    "queryAggregation" to (properties["warmup"]?.toInt()
+                    "queryAggregation" to (properties["queryAggregation"]
                        ?: throw IllegalArgumentException("queryAggregation expected")),
-                    "repetitionAggregation" to (properties["warmup"]?.toInt()
+                    "repetitionAggregation" to (properties["repetitionAggregation"]
                        ?: throw IllegalArgumentException("repetitionAggregation expected")),
-                    "operator" to (properties["warmup"]?.toInt()
+                    "operator" to (properties["operator"] ?: throw IllegalArgumentException("operator expected")),
-                        ?: throw IllegalArgumentException("operator expected")),
                    "threshold" to (properties["threshold"]?.toInt()
                        ?: throw IllegalArgumentException("threshold expected"))
                )

--- a/theodolite/src/main/kotlin/theodolite/execution/TheodoliteExecutor.kt
+++ b/theodolite/src/main/kotlin/theodolite/execution/TheodoliteExecutor.kt
@@ -137,6 +137,12 @@ class TheodoliteExecutor(
                config.compositeStrategy.benchmarkExecutor.results,
                "${resultsFolder}exp${this.config.executionId}-result"
            )
+            // Create expXYZ_demand.csv file
+            ioHandler.writeToCSVFile(
+                "${resultsFolder}exp${this.config.executionId}_demand",
+                calculateDemandMetric(config.loads, config.compositeStrategy.benchmarkExecutor.results),
+                listOf("load","resources")
+            )
        }
        kubernetesBenchmark.teardownInfrastructure()
    }
@@ -151,4 +157,8 @@ class TheodoliteExecutor(
        return executionID
    }
+    private fun calculateDemandMetric(loadDimensions: List<LoadDimension>, results: Results): List<List<String>> {
+        return loadDimensions.map { listOf(it.get().toString(), results.getMinRequiredInstances(it).get().toString()) }
+    }
 }