Skip to content
Snippets Groups Projects
Commit ddbe80a7 authored by Lorenz Boguhn's avatar Lorenz Boguhn
Browse files

Merge branch 'master' into feature/126-kafka-communication

parents c679e140 d0299a1d
No related branches found
No related tags found
5 merge requests!159Re-implementation of Theodolite with Kotlin/Quarkus,!157Update Graal Image in CI pipeline,!83WIP: Re-implementation of Theodolite with Kotlin/Quarkus,!81Feature/126 kafka communication,!78Resolve "Implement Quarkus/Kotlin protype"
%% Cell type:code id: tags:
```
print("hello")
```
%% Cell type:code id: tags:
```
import os
from datetime import datetime, timedelta, timezone
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
```
%% Cell type:code id: tags:
```
os.getcwd()
```
%% Cell type:code id: tags:
```
exp_id = 2012
warmup_sec = 60
warmup_partitions_sec = 120
threshold = 2000 #slope
#directory = '../results'
directory = '<path-to>/results'
directory_out = '<path-to>/results-inst'
```
%% Cell type:code id: tags:outputPrepend,outputPrepend
```
#exp_id = 35
#os.chdir("./results-final")
raw_runs = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")]
for filename in filenames:
#print(filename)
run_params = filename[:-4].split("_")
dim_value = run_params[2]
instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"]
input = df
#print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input)
#print(input.iloc[0, 'timestamp'])
regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input
#input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show()
X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array
Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column
linear_regressor = LinearRegression() # create object for the class
linear_regressor.fit(X, Y) # perform linear regression
Y_pred = linear_regressor.predict(X) # make predictions
trend_slope = linear_regressor.coef_[0][0]
#print(linear_regressor.coef_)
row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope}
#print(row)
raw_runs.append(row)
lags = pd.DataFrame(raw_runs)
```
%% Cell type:code id: tags:
```
lags.head()
```
%% Cell type:code id: tags:
```
raw_partitions = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")]
for filename in filenames:
#print(filename)
run_params = filename[:-4].split("_")
dim_value = run_params[2]
instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"]
input = df
#print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input)
#print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input
input = input.loc[input['topic'] >= 'input']
mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show()
row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean}
#print(row)
raw_partitions.append(row)
partitions = pd.DataFrame(raw_partitions)
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
```
%% Cell type:code id: tags:
```
raw_obs_instances = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")]
for filename in filenames:
run_params = filename[:-4].split("_")
dim_value = run_params[2]
instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename))
if df.empty:
continue
#input = df.loc[df['topic'] == "input"]
input = df
#print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input)
#print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input
#input = input.loc[input['topic'] >= 'input']
#mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show()
#row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean}
#print(row)
raw_obs_instances.append(row)
obs_instances = pd.DataFrame(raw_obs_instances)
obs_instances.head()
```
%% Cell type:code id: tags:
```
runs = lags
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
#runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)
#runs.loc[runs['failed']==True]
```
%% Cell type:code id: tags:
```
#threshold = 1000
# Set to true if the trend line has a slope less than
runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1)
runs.columns = runs.columns.str.strip()
runs.sort_values(by=["dim_value", "instances"])
```
%% Cell type:code id: tags:
```
filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)]
grouped = filtered.groupby(['dim_value'])['instances'].min()
min_suitable_instances = grouped.to_frame().reset_index()
min_suitable_instances
```
%% Cell type:code id: tags:
```
min_suitable_instances.to_csv(os.path.join(directory_out, f'../results-inst/exp{exp_id}_min-suitable-instances.csv'), index=False)
min_suitable_instances.to_csv(os.path.join(directory_out, f'exp{exp_id}_min-suitable-instances.csv'), index=False)
```
%% Cell type:code id: tags:
```
min_suitable_instances.plot(kind='line',x='dim_value',y='instances')
# min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True)
plt.show()
```
%% Cell type:code id: tags:
```
```
......
......@@ -153,11 +153,11 @@ declarations for different volume types.
Using a [hostPath volume](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) is the easiest option when
running Theodolite locally, e.g., with minikube or kind.
Just modify `infrastructure/kubernetes/volumeSingle.yaml` by setting `path` to the directory on your host machine where
Just modify `infrastructure/kubernetes/volume-hostpath.yaml` by setting `path` to the directory on your host machine where
all benchmark results should be stored and run:
```sh
kubectl apply -f infrastructure/kubernetes/volumeSingle.yaml
kubectl apply -f infrastructure/kubernetes/volume-hostpath.yaml
```
##### *local* volume
......@@ -166,12 +166,12 @@ A [local volume](https://kubernetes.io/docs/concepts/storage/volumes/#local) is
access (e.g. via SSH) to one of your cluster nodes.
You first need to create a directory on a selected node where all benchmark results should be stored. Next, modify
`infrastructure/kubernetes/volumeCluster.yaml` by setting `<node-name>` to your selected node (this node will most
likely also execute the job). Further, you have to set `path` to the directory on the node you just created. To deploy
`infrastructure/kubernetes/volume-local.yaml` by setting `<node-name>` to your selected node. (This node will most
likely also execute the [Theodolite job](#Execution).) Further, you have to set `path` to the directory on the node you just created. To deploy
you volume run:
```sh
kubectl apply -f infrastructure/kubernetes/volumeCluster.yaml
kubectl apply -f infrastructure/kubernetes/volume-local.yaml
```
##### Other volumes
......
......@@ -11,7 +11,7 @@ spec:
claimName: theodolite-pv-claim
containers:
- name: theodolite
image: bvonheid/theodolite:latest
image: ghcr.io/cau-se/theodolite:latest
# imagePullPolicy: Never # Used to pull "own" local image
env:
- name: UC # mandatory
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment