Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • she/theodolite
1 result
Show changes
Commits on Source (37)
Showing
with 282 additions and 104 deletions
......@@ -18,4 +18,15 @@ benchmarks:
- benchmarks/*
- when: manual
allow_failure: true
\ No newline at end of file
execution:
stage: triggers
trigger:
include: execution/.gitlab-ci.yml
strategy: depend
rules:
- if: "$CI_COMMIT_TAG"
- changes:
- execution/*
- when: manual
allow_failure: true
.dockerignore
Dockerfile
\ No newline at end of file
FROM jupyter/base-notebook
COPY . /home/jovyan
WORKDIR /home/jovyan
RUN rm -r work
RUN pip install -r requirements.txt
......@@ -9,14 +9,32 @@ benchmark execution results and plotting. The following notebooks are provided:
## Usage
For executing benchmarks and analyzing their results, a **Python 3.7**
installation is required (e.g., in a virtual environment). Our notebooks require some
Python libraries, which can be installed via:
Basically, the Theodolite Analysis Jupyter notebooks should be runnable by any Jupyter server. To make it a bit easier,
we provide introductions for running notebooks with Docker and with Visual Studio Code. These intoduction may also be
a good starting point for using another service.
For analyzing and visualizing benchmark results, either Docker or a Jupyter installation with Python 3.7 or newer is
required (e.g., in a virtual environment).
### Running with Docker
This option requires Docker to be installed. You can build and run a container using the following commands. Make sure
to set the `results` volume to the directory with your execution results and `results-inst` to a directory where the
final scalability graphs should be placed. The output of the *run* command gives you an URL of the form
`http://127.0.0.1:8888/?token=...`, which you should open in your webbrowser. From there you can access all notebooks.
You can stop the Jupyter server with Crtl + C.
```sh
pip install -r requirements.txt
docker build . -t theodolite-analysis
docker run --rm -p 8888:8888 -v "$PWD/../results":/home/jovyan/results -v "$PWD/../results-inst":/home/jovyan/results-inst theodolite-analysis
```
We have tested these
notebooks with [Visual Studio Code](https://code.visualstudio.com/docs/python/jupyter-support),
however, every other server should be fine as well.
### Running with Visual Studio Code
The [Visual Studio Code Documentation](https://code.visualstudio.com/docs/python/jupyter-support) shows to run Jupyter
notebooks with Visual Studio Code. For our notebooks, Python 3.7 or newer is required (e.g., in a virtual environment).
Moreover, they require some Python libraries, which can be installed by:
```sh
pip install -r requirements.txt
```
\ No newline at end of file
......@@ -110,14 +110,15 @@ public abstract class KafkaStreamsBuilder {
*
* @return A {@code Topology} for a {@code KafkaStreams} application.
*/
protected abstract Topology buildTopology();
protected abstract Topology buildTopology(Properties properties);
/**
* Builds the {@link KafkaStreams} instance.
*/
public KafkaStreams build() {
// Create the Kafka streams instance.
return new KafkaStreams(this.buildTopology(), this.buildProperties());
final Properties properties = this.buildProperties();
return new KafkaStreams(this.buildTopology(properties), properties);
}
}
package theodolite.uc1.streamprocessing;
import com.google.gson.Gson;
import java.util.Properties;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.Topology;
......@@ -36,7 +37,7 @@ public class TopologyBuilder {
/**
* Build the {@link Topology} for the History microservice.
*/
public Topology build() {
public Topology build(final Properties properties) {
this.builder
.stream(this.inputTopic, Consumed.with(
Serdes.String(),
......@@ -44,6 +45,6 @@ public class TopologyBuilder {
.mapValues(v -> this.gson.toJson(v))
.foreach((k, v) -> LOGGER.info("Key: " + k + " Value: " + v));
return this.builder.build();
return this.builder.build(properties);
}
}
package theodolite.uc1.streamprocessing;
import java.util.Objects;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.kafka.streams.Topology;
import theodolite.commons.kafkastreams.KafkaStreamsBuilder;
......@@ -16,9 +17,9 @@ public class Uc1KafkaStreamsBuilder extends KafkaStreamsBuilder {
}
@Override
protected Topology buildTopology() {
protected Topology buildTopology(final Properties properties) {
Objects.requireNonNull(this.inputTopic, "Input topic has not been set.");
return new TopologyBuilder(this.inputTopic,
new SchemaRegistryAvroSerdeFactory(this.schemaRegistryUrl)).build();
new SchemaRegistryAvroSerdeFactory(this.schemaRegistryUrl)).build(properties);
}
}
package theodolite.uc2.streamprocessing;
import java.time.Duration;
import java.util.Properties;
import java.util.Set;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KeyValue;
......@@ -59,9 +60,9 @@ public class TopologyBuilder {
final Duration emitPeriod, final Duration gracePeriod,
final SchemaRegistryAvroSerdeFactory srAvroSerdeFactory) {
this.inputTopic = inputTopic;
this.outputTopic = outputTopic;
this.feedbackTopic = feedbackTopic;
this.configurationTopic = configurationTopic;
this.outputTopic = outputTopic;
this.emitPeriod = emitPeriod;
this.gracePeriod = gracePeriod;
......@@ -71,7 +72,7 @@ public class TopologyBuilder {
/**
* Build the {@link Topology} for the Aggregation microservice.
*/
public Topology build() {
public Topology build(final Properties properties) {
// 1. Build Parent-Sensor Table
final KTable<String, Set<String>> parentSensorTable = this.buildParentSensorTable();
......@@ -92,7 +93,7 @@ public class TopologyBuilder {
// 5. Expose Aggregations Stream
this.exposeOutputStream(aggregations);
return this.builder.build();
return this.builder.build(properties);
}
private KTable<String, ActivePowerRecord> buildInputTable() {
......
......@@ -2,6 +2,7 @@ package theodolite.uc2.streamprocessing;
import java.time.Duration;
import java.util.Objects;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.kafka.streams.Topology;
import theodolite.commons.kafkastreams.KafkaStreamsBuilder;
......@@ -51,7 +52,7 @@ public class Uc2KafkaStreamsBuilder extends KafkaStreamsBuilder { // NOPMD build
}
@Override
protected Topology buildTopology() {
protected Topology buildTopology(final Properties properties) {
Objects.requireNonNull(this.inputTopic, "Input topic has not been set.");
Objects.requireNonNull(this.feedbackTopic, "Feedback topic has not been set.");
Objects.requireNonNull(this.outputTopic, "Output topic has not been set.");
......@@ -59,14 +60,14 @@ public class Uc2KafkaStreamsBuilder extends KafkaStreamsBuilder { // NOPMD build
final TopologyBuilder topologyBuilder = new TopologyBuilder(
this.inputTopic,
this.feedbackTopic,
this.outputTopic,
this.feedbackTopic,
this.configurationTopic,
this.emitPeriod == null ? EMIT_PERIOD_DEFAULT : this.emitPeriod,
this.gracePeriod == null ? GRACE_PERIOD_DEFAULT : this.gracePeriod,
new SchemaRegistryAvroSerdeFactory(this.schemaRegistryUrl));
return topologyBuilder.build();
return topologyBuilder.build(properties);
}
}
......@@ -2,6 +2,7 @@ package theodolite.uc3.streamprocessing;
import com.google.common.math.Stats;
import java.time.Duration;
import java.util.Properties;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
......@@ -46,7 +47,7 @@ public class TopologyBuilder {
/**
* Build the {@link Topology} for the History microservice.
*/
public Topology build() {
public Topology build(final Properties properties) {
this.builder
.stream(this.inputTopic,
Consumed.with(Serdes.String(),
......@@ -68,6 +69,6 @@ public class TopologyBuilder {
.peek((k, v) -> LOGGER.info(k + ": " + v))
.to(this.outputTopic, Produced.with(Serdes.String(), Serdes.String()));
return this.builder.build();
return this.builder.build(properties);
}
}
......@@ -2,6 +2,7 @@ package theodolite.uc3.streamprocessing;
import java.time.Duration;
import java.util.Objects;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.kafka.streams.Topology;
import theodolite.commons.kafkastreams.KafkaStreamsBuilder;
......@@ -30,14 +31,14 @@ public class Uc3KafkaStreamsBuilder extends KafkaStreamsBuilder {
}
@Override
protected Topology buildTopology() {
protected Topology buildTopology(final Properties properties) {
Objects.requireNonNull(this.inputTopic, "Input topic has not been set.");
Objects.requireNonNull(this.outputTopic, "Output topic has not been set.");
Objects.requireNonNull(this.windowDuration, "Window duration has not been set.");
final TopologyBuilder topologyBuilder = new TopologyBuilder(this.inputTopic, this.outputTopic,
new SchemaRegistryAvroSerdeFactory(this.schemaRegistryUrl), this.windowDuration);
return topologyBuilder.build();
return topologyBuilder.build(properties);
}
}
......@@ -5,6 +5,7 @@ import java.time.Duration;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.Properties;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KeyValue;
......@@ -54,7 +55,7 @@ public class TopologyBuilder {
/**
* Build the {@link Topology} for the History microservice.
*/
public Topology build() {
public Topology build(final Properties properties) {
final StatsKeyFactory<HourOfDayKey> keyFactory = new HourOfDayKeyFactory();
final Serde<HourOfDayKey> keySerde = HourOfDayKeySerde.create();
......@@ -89,6 +90,6 @@ public class TopologyBuilder {
Serdes.String()));
// this.serdes.avroValues()));
return this.builder.build();
return this.builder.build(properties);
}
}
......@@ -2,6 +2,7 @@ package theodolite.uc4.streamprocessing;
import java.time.Duration;
import java.util.Objects;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.kafka.streams.Topology;
import theodolite.commons.kafkastreams.KafkaStreamsBuilder;
......@@ -36,7 +37,7 @@ public class Uc4KafkaStreamsBuilder extends KafkaStreamsBuilder {
}
@Override
protected Topology buildTopology() {
protected Topology buildTopology(final Properties properties) {
Objects.requireNonNull(this.inputTopic, "Input topic has not been set.");
Objects.requireNonNull(this.outputTopic, "Output topic has not been set.");
Objects.requireNonNull(this.aggregtionDuration, "Aggregation duration has not been set.");
......@@ -49,7 +50,7 @@ public class Uc4KafkaStreamsBuilder extends KafkaStreamsBuilder {
this.aggregtionDuration,
this.aggregationAdvance);
return topologyBuilder.build();
return topologyBuilder.build(properties);
}
}
stages:
- deploy
deploy:
stage: deploy
tags:
- exec-dind
image: docker:19.03.1
services:
- docker:19.03.1-dind
variables:
DOCKER_TLS_CERTDIR: "/certs"
script:
- DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
- docker build --pull -t theodolite ./execution
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite $DOCKERHUB_ORG/theodolite:${DOCKER_TAG_NAME}latest"
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite $DOCKERHUB_ORG/theodolite:$DOCKER_TAG_NAME$CI_COMMIT_SHORT_SHA"
- "[ $CI_COMMIT_TAG ] && docker tag theodolite $DOCKERHUB_ORG/theodolite:$CI_COMMIT_TAG"
- echo $DOCKERHUB_PW | docker login -u $DOCKERHUB_ID --password-stdin
- docker push $DOCKERHUB_ORG/theodolite
- docker logout
rules:
- if: "$DOCKERHUB_ORG && $DOCKERHUB_ID && $DOCKERHUB_PW && $CI_COMMIT_TAG"
when: always
- changes:
- execution/**/*
if: "$DOCKERHUB_ORG && $DOCKERHUB_ID && $DOCKERHUB_PW"
when: always
- if: "$DOCKERHUB_ORG && $DOCKERHUB_ID && $DOCKERHUB_PW"
when: manual
allow_failure: true
deploy-ghcr:
stage: deploy
tags:
- exec-dind
image: docker:19.03.1
services:
- docker:19.03.1-dind
variables:
DOCKER_TLS_CERTDIR: "/certs"
script:
- DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
- docker build --pull -t theodolite ./execution
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite ghcr.io/$GITHUB_CR_ORG/theodolite:${DOCKER_TAG_NAME}latest"
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite ghcr.io/$GITHUB_CR_ORG/theodolite:$DOCKER_TAG_NAME$CI_COMMIT_SHORT_SHA"
- "[ $CI_COMMIT_TAG ] && docker tag theodolite ghcr.io/$GITHUB_CR_ORG/theodolite:$CI_COMMIT_TAG"
- echo $GITHUB_CR_TOKEN | docker login ghcr.io -u $GITHUB_CR_USER --password-stdin
- docker push ghcr.io/$GITHUB_CR_ORG/theodolite
- docker logout
rules:
- if: "$GITHUB_CR_ORG && $GITHUB_CR_USER && $GITHUB_CR_TOKEN && $CI_COMMIT_TAG"
when: always
- changes:
- execution/**/*
if: "$GITHUB_CR_ORG && $GITHUB_CR_USER && $GITHUB_CR_TOKEN"
when: always
- if: "$GITHUB_CR_ORG && $GITHUB_CR_USER && $GITHUB_CR_TOKEN"
when: manual
allow_failure: true
\ No newline at end of file
......@@ -8,45 +8,28 @@ benchmarks](#execution).
## Installation
### Kubernetes Cluster
For executing benchmarks, access to a Kubernetes cluster is required. If you already run other applications inside your
cluster, you might want to consider creating a dedicated namespace for your benchmarks.
For executing benchmarks, access to Kubernetes cluster is required. We suggest
to create a dedicated namespace for executing your benchmarks. The following
services need to be available as well.
### Installing Dependencies
### Kubernetes Volume
For executing the benchmark as a Kubernetes job it is required to use a volume to store the results of the executions.
In `infrastructure/kubernetes` are two files for creating a volume.
Either one of them should be used.
The `volumeSingle.yaml` is meant for systems where Kubernetes is run locally (e.g. minikube, kind etc.).
However, you can also use the other file.
In `volumeSingle.yaml` you need to set `path` to the path on your machine where the results should be stored.
The `volumeCluster.yaml` should be used when Kubernetes runs in the cloud.
In the `nodeAffinity` section you need to exchange `<node-name>` to the name of the node where the volume should be created (this node will most likely execute also the job).
However, you can also set a different `nodeAffinity`.
Further you need to set `path` to the path on the node where the results should be stored.
After setting the properties you can create the volume with:
```sh
kubectl apply -f infrastructure/kubernetes/volume(Single|Cluster).yaml
```
The following third-party services need to be installed in your cluster. For most of them, the suggested way to install
them is via [Helm](https://helm.sh).
#### Prometheus
We suggest to use the [Prometheus Operator](https://github.com/coreos/prometheus-operator)
and create a dedicated Prometheus instance for these benchmarks.
If Prometheus Operator is not already available on your cluster, a convenient
way to install is via the [**unofficial** Prometheus Operator Helm chart](https://github.com/helm/charts/tree/master/stable/prometheus-operator).
As you may not need an entire cluster monitoring stack, you can use our Helm
configuration to only install the operator:
If Prometheus Operator is not already available on your cluster, a convenient way to install it is via the
[Prometheus community Helm chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack).
As you may not need an entire cluster monitoring stack, you can use our Helm configuration to only install the
operator:
```sh
helm install prometheus-operator stable/prometheus-operator -f infrastructure/prometheus/helm-values.yaml
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install prometheus-operator prometheus-community/kube-prometheus-stack -f infrastructure/prometheus/helm-values.yaml
```
After installation, you need to create a Prometheus instance:
......@@ -55,9 +38,17 @@ After installation, you need to create a Prometheus instance:
kubectl apply -f infrastructure/prometheus/prometheus.yaml
```
You might also need to apply the [ServiceAccount](infrastructure/prometheus/service-account.yaml), [ClusterRole](infrastructure/prometheus/cluster-role.yaml)
and the [CusterRoleBinding](infrastructure/prometheus/cluster-role-binding.yaml),
depending on your cluster's security policies.
You might also need to apply the [ClusterRole](infrastructure/prometheus/cluster-role.yaml), the
[CusterRoleBinding](infrastructure/prometheus/cluster-role-binding.yaml) and the
[ServiceAccount](infrastructure/prometheus/service-account.yaml), depending on your cluster's security
policies. If you are not in the *default* namespace, alter the namespace in
[Prometheus' ClusterRoleBinding](infrastructure/prometheus/cluster-role-binding.yaml) accordingly.
```sh
kubectl apply -f infrastructure/prometheus/cluster-role.yaml
kubectl apply -f infrastructure/prometheus/cluster-role-binding.yaml
kubectl apply -f infrastructure/prometheus/service-account.yaml
```
For the individual benchmarking components to be monitored, [ServiceMonitors](https://github.com/coreos/prometheus-operator#customresourcedefinitions)
are used. See the corresponding sections below for how to install them.
......@@ -68,14 +59,16 @@ As with Prometheus, we suggest to create a dedicated Grafana instance. Grafana
with our default configuration can be installed with Helm:
```sh
helm install grafana stable/grafana -f infrastructure/grafana/values.yaml
helm repo add grafana https://grafana.github.io/helm-charts
helm repo update
helm install grafana grafana/grafana -f infrastructure/grafana/values.yaml
```
The official [Grafana Helm Chart repository](https://github.com/helm/charts/tree/master/stable/grafana)
provides further documentation including a table of configuration options.
We provide ConfigMaps for a [Grafana dashboard](infrastructure/grafana/dashboard-config-map.yaml) and a [Grafana data source](infrastructure/grafana/prometheus-datasource-config-map.yaml).
Create them as follows:
We provide ConfigMaps for a [Grafana dashboard](infrastructure/grafana/dashboard-config-map.yaml) and a
[Grafana data source](infrastructure/grafana/prometheus-datasource-config-map.yaml). Create them as follows:
```sh
kubectl apply -f infrastructure/grafana/dashboard-config-map.yaml
......@@ -102,6 +95,9 @@ kubectl apply -f infrastructure/kafka/service-monitor.yaml
Other Kafka deployments, for example, using Strimzi, should work in a similar way.
*Please note that currently, even if installed differently, the corresponding services must run at
*my-confluent-cp-kafka:9092*, *my-confluent-cp-zookeeper:2181* and *my-confluent-cp-schema-registry:8081*.
#### A Kafka Client Pod
A permanently running pod used for Kafka configuration is started via:
......@@ -128,71 +124,91 @@ To install it:
helm install kafka-lag-exporter https://github.com/lightbend/kafka-lag-exporter/releases/download/v0.6.3/kafka-lag-exporter-0.6.3.tgz -f infrastructure/kafka-lag-exporter/values.yaml
```
### Installing Theodolite
### Python 3.7 (Only required for local Execution Control)
While Theodolite itself has not be installed as it is loaded at runtime (see [execution](#Execution)), it requires some
resources to be deployed in your cluster. These resources are grouped under RBAC and Volume in the following paragraphs.
For executing benchmarks, a **Python 3.7** installation is required. We suggest
to use a virtual environment placed in the `.venv` directory (in the Theodolite
root directory). As set of requirements is needed. You can install them with the following
command (make sure to be in your virtual environment if you use one):
#### Theodolite RBAC
**The following step is only required if RBAC is enabled in your cluster.** If you are not sure whether this is the
case, you want to simply try it without the following step.
If RBAC is enabled in your cluster, you have to allow Theodolite to start and stop pods etc. To do so, deploy the RBAC
resources via:
```sh
pip install -r requirements.txt
kubectl apply -f infrastructure/kubernetes/rbac/role.yaml
kubectl apply -f infrastructure/kubernetes/rbac/role-binding.yaml
kubectl apply -f infrastructure/kubernetes/rbac/service-account.yaml
```
#### Theodolite Volume
### Required Manual Adjustments
In order to persistently store benchmark results, Theodolite needs a volume mounted. We provide pre-configured
declarations for different volume types.
Depending on your setup, some additional adjustments may be necessary:
##### *hostPath* volume
* Change Kafka and Zookeeper servers in the Kubernetes deployments (uc1-application etc.) and `run_XX.sh` scripts
* Change the name of your Kubernetes namespace for [Prometheus' ClusterRoleBinding](infrastructure/prometheus/cluster-role-binding.yaml)
* *Please let us know if there are further adjustments necessary*
Using a [hostPath volume](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) is the easiest option when
running Theodolite locally, e.g., with minikube or kind.
Just modify `infrastructure/kubernetes/volumeSingle.yaml` by setting `path` to the directory on your host machine where
all benchmark results should be stored and run:
```sh
kubectl apply -f infrastructure/kubernetes/volumeSingle.yaml
```
## Execution
##### *local* volume
You can either execute the Execution Control on your machine or also deploy the Execution control in Kubernetes.
A [local volume](https://kubernetes.io/docs/concepts/storage/volumes/#local) is a simple option to use when having
access (e.g. via SSH) to one of your cluster nodes.
### Local Execution
You first need to create a directory on a selected node where all benchmark results should be stored. Next, modify
`infrastructure/kubernetes/volumeCluster.yaml` by setting `<node-name>` to your selected node (this node will most
likely also execute the job). Further, you have to set `path` to the directory on the node you just created. To deploy
you volume run:
Please note that a **Python 3.7** installation is required for executing Theodolite.
```sh
kubectl apply -f infrastructure/kubernetes/volumeCluster.yaml
```
The `theodolite.py` is the entrypoint for all benchmark executions. Is has to be called as follows:
##### Other volumes
```python
python theodolite.py --uc <uc> --loads <load> [<load> ...] --instances <instances> [<instances> ...]
```
To use volumes provided by public cloud providers or network-based file systems, you can use the definitions in
`infrastructure/kubernetes/` as a starting point. See the offical
[volumes documentation](https://kubernetes.io/docs/concepts/storage/volumes/) for additional information.
The command above is the minimal command for execution.
Further configurations options are described [below](#configuration) or available via `python theodolite.py -h`
### Kubernetes Execution
## Execution
The Execution Control will be run by a Kubernetes Job.
This Job creates a pod that will execute the Executuion Control.
To configure the parameters, the `theodolite.yaml` need to be changed.
For the options take a look at [configuration](#configuration).
The preferred way to run scalability benchmarks with Theodolite is to deploy Theodolite
[Kubernetes Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) in your cluster. For running
Theodolite locally on your machine see the description below.
To start the Benchmark the following command need to be executed:
```sh
kubectl apply -f theodolite.yaml
```
`theodolite.yaml` provides a template for your own Theodolite job. To run your own job, create a copy, give it a name
(`metadata.name`) and adjust configuration parameters as desired. For a description of available configuration options
see the [Configuration](#configuration) section below. Note, that you might uncomment the `serviceAccountName` line if
RBAC is enabled on your cluster (see installation of [Theodolite RBAC](#Theodolite-RBAC)).
With `kubectl logs -f theodolite-<*>` you can show the log of the execution control.
To start the execution of a benchmark run (with `<your-theodolite-yaml>` being your job definition):
When the job is finished, your results should be in your mounted [Kubernetes volume](#kubernetes-volume).
In order to start a new benchmark, the old job needs to be deleted.
This can be done with:
```sh
kubectl delete -f theodolite.yaml
kubectl apply -f <your-theodolite-yaml>
```
This will create a pod with a name such as `your-job-name-xxxxxx`. You can verifiy this via `kubectl get pods`. With
`kubectl logs -f <your-job-name-xxxxxx>`, you can follow the benchmark execution logs.
Once your job is completed (you can verify via `kubectl get jobs), its results are stored inside your configured
Kubernetes volume.
**Make sure to always run only one Theodolite job at a time.**
### Configuration
| Python | Kubernetes | Description |
| Command line | Kubernetes | Description |
| -------------------- | ------------------- | ------------------------------------------------------------ |
| --uc | UC | **[Mandatory]** Stream processing use case to be benchmarked. Has to be one of `1`, `2`, `3` or `4`. |
| --loads | LOADS | **[Mandatory]** Values for the workload generator to be tested, should be sorted in ascending order. |
......@@ -208,6 +224,7 @@ kubectl delete -f theodolite.yaml
| --prometheus | PROMETHEUS_BASE_URL | Defines where to find the prometheus instance. *Default:* `http://localhost:9090` |
| --path | RESULT_PATH | A directory path for the results. Relative to the Execution folder. *Default:* `results` |
| --configurations | CONFIGURATIONS | Defines environment variables for the use cases and, thus, enables further configuration options. |
| --threshold | THRESHOLD | The threshold for the trend slop that the search strategies use to determine that a load could be handled. *Default:* `2000` |
### Domain Restriction
......@@ -219,8 +236,45 @@ For dimension value, we have a domain of the amounts of instances. As a conseque
* If the dimension value is not the smallest dimension value and N is the amount of minimal amount of instances that was suitable for the last smaller dimension value the domain for this dimension value contains all amounts of instances greater than, or equal to N.
### Benchmarking Search Strategies
There are the following benchmarking strategies:
* `check-all`: For each dimension value, execute one lag experiment for all amounts of instances within the current domain.
* `linear-search`: A heuristic which works as follows: For each dimension value, execute one lag experiment for all number of instances within the current domain. The execution order is from the lowest number of instances to the highest amount of instances and the execution for each dimension value is stopped, when a suitable amount of instances is found or if all lag experiments for the dimension value were not successful.
* `binary-search`: A heuristic which works as follows: For each dimension value, execute one lag experiment for all number of instances within the current domain. The execution order is in a binary-search-like manner. The execution is stopped, when a suitable amount of instances is found or if all lag experiments for the dimension value were not successful.
## Observation
The installed Grafana instance provides a dashboard to observe the benchmark execution. Unless configured otherwise,
this dashboard can be accessed via `http://<cluster-ip>:31199` or via `http://localhost:31199` if proxied with
`kubectl port-forward svc/grafana 8080:service`. Default credentials are user *admin* with password *admin*.
## Local Execution (e.g. for Development)
As an alternative to executing Theodolite as a Kubernetes Job, it is also possible to run it from your local system,
for example, for development purposes. In addition to the generel installation instructions, the following adjustments
are neccessary.
### Installation
For local execution a **Python 3.7** installation is required. We suggest to use a virtual environment placed in the `.venv`
directory (in the Theodolite root directory). A set of requirements is needed. You can install them with the following
command (make sure to be in your virtual environment if you use one):
```sh
pip install -r requirements.txt
```
Kubernetes volumes and service accounts, roles, and role bindings for Theodolite are not required in this case.
### Local Execution
The `theodolite.py` is the entrypoint for all benchmark executions. Is has to be called as follows:
```python
python theodolite.py --uc <uc> --loads <load> [<load> ...] --instances <instances> [<instances> ...]
```
This command is the minimal command for execution. Further configurations options are described [above](#configuration)
or available via `python theodolite.py -h`.
\ No newline at end of file
......@@ -11,7 +11,9 @@ adminPassword: admin
## Sidecars that collect the configmaps with specified label and stores the included files them into the respective folders
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image: kiwigrid/k8s-sidecar:0.1.99
image:
repository: "kiwigrid/k8s-sidecar"
tag: "1.1.0"
imagePullPolicy: IfNotPresent
dashboards:
enabled: true
......
......@@ -136,6 +136,11 @@ def benchmark_parser(description):
metavar='<strategy>',
default=os.environ.get('SEARCH_STRATEGY', 'default'),
help='The benchmarking search strategy. Can be set to default, linear-search or binary-search')
parser.add_argument('--threshold',
type=int,
metavar='<threshold>',
default=os.environ.get('THRESHOLD', 2000),
help='The threshold for the trend slop that the search strategies use to determine that a load could be handled')
return parser
......
......@@ -2,7 +2,7 @@ from sklearn.linear_model import LinearRegression
import pandas as pd
import os
def compute(directory, filename, warmup_sec, threshold):
def compute(directory, filename, warmup_sec):
df = pd.read_csv(os.path.join(directory, filename))
input = df
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
......@@ -16,4 +16,4 @@ def compute(directory, filename, warmup_sec, threshold):
trend_slope = linear_regressor.coef_[0][0]
return trend_slope
\ No newline at end of file
return trend_slope
......@@ -282,8 +282,16 @@ def run_evaluation(exp_id, uc_id, dim_value, instances, execution_minutes, prome
:param int execution_minutes: How long the use case where executed.
"""
print('Run evaluation function')
lag_analysis.main(exp_id, f'uc{uc_id}', dim_value, instances,
execution_minutes, prometheus_base_url, result_path)
try:
lag_analysis.main(exp_id, f'uc{uc_id}', dim_value, instances,
execution_minutes, prometheus_base_url,
result_path)
except Exception as e:
err_msg = 'Evaluation function failed'
print(err_msg)
logging.exception(err_msg)
print('Benchmark execution continues')
return
......
......@@ -18,5 +18,6 @@ class ExperimentConfig:
configurations: dict
domain_restriction_strategy: object
search_strategy: object
threshold: int
subexperiment_executor: object
subexperiment_evaluator: object