Skip to content
Snippets Groups Projects
Commit d6736e05 authored by Lorenz Boguhn's avatar Lorenz Boguhn
Browse files

Merge branch 'master' into feature/127-zookeeper-communication

parents bdeadef8 d0299a1d
No related branches found
No related tags found
5 merge requests!159Re-implementation of Theodolite with Kotlin/Quarkus,!157Update Graal Image in CI pipeline,!83WIP: Re-implementation of Theodolite with Kotlin/Quarkus,!79Feature/127 zookeeper communication,!78Resolve "Implement Quarkus/Kotlin protype"
This commit is part of merge request !78. Comments created here will be created in the context of that merge request.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
print("hello") print("hello")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
import os import os
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import pandas as pd import pandas as pd
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
os.getcwd() os.getcwd()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
exp_id = 2012 exp_id = 2012
warmup_sec = 60 warmup_sec = 60
warmup_partitions_sec = 120 warmup_partitions_sec = 120
threshold = 2000 #slope threshold = 2000 #slope
#directory = '../results' #directory = '../results'
directory = '<path-to>/results' directory = '<path-to>/results'
directory_out = '<path-to>/results-inst' directory_out = '<path-to>/results-inst'
``` ```
%% Cell type:code id: tags:outputPrepend,outputPrepend %% Cell type:code id: tags:outputPrepend,outputPrepend
``` ```
#exp_id = 35 #exp_id = 35
#os.chdir("./results-final") #os.chdir("./results-final")
raw_runs = [] raw_runs = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")]
for filename in filenames: for filename in filenames:
#print(filename) #print(filename)
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array
Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column
linear_regressor = LinearRegression() # create object for the class linear_regressor = LinearRegression() # create object for the class
linear_regressor.fit(X, Y) # perform linear regression linear_regressor.fit(X, Y) # perform linear regression
Y_pred = linear_regressor.predict(X) # make predictions Y_pred = linear_regressor.predict(X) # make predictions
trend_slope = linear_regressor.coef_[0][0] trend_slope = linear_regressor.coef_[0][0]
#print(linear_regressor.coef_) #print(linear_regressor.coef_)
row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope} row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope}
#print(row) #print(row)
raw_runs.append(row) raw_runs.append(row)
lags = pd.DataFrame(raw_runs) lags = pd.DataFrame(raw_runs)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
lags.head() lags.head()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
raw_partitions = [] raw_partitions = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")]
for filename in filenames: for filename in filenames:
#print(filename) #print(filename)
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
input = input.loc[input['topic'] >= 'input'] input = input.loc[input['topic'] >= 'input']
mean = input['value'].mean() mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean} row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean}
#print(row) #print(row)
raw_partitions.append(row) raw_partitions.append(row)
partitions = pd.DataFrame(raw_partitions) partitions = pd.DataFrame(raw_partitions)
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances']) #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
raw_obs_instances = [] raw_obs_instances = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")]
for filename in filenames: for filename in filenames:
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
if df.empty: if df.empty:
continue continue
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
#input = input.loc[input['topic'] >= 'input'] #input = input.loc[input['topic'] >= 'input']
#mean = input['value'].mean() #mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
#row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean} #row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean}
#print(row) #print(row)
raw_obs_instances.append(row) raw_obs_instances.append(row)
obs_instances = pd.DataFrame(raw_obs_instances) obs_instances = pd.DataFrame(raw_obs_instances)
obs_instances.head() obs_instances.head()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
runs = lags runs = lags
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances']) #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
#runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1) #runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)
#runs.loc[runs['failed']==True] #runs.loc[runs['failed']==True]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#threshold = 1000 #threshold = 1000
# Set to true if the trend line has a slope less than # Set to true if the trend line has a slope less than
runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1) runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1)
runs.columns = runs.columns.str.strip() runs.columns = runs.columns.str.strip()
runs.sort_values(by=["dim_value", "instances"]) runs.sort_values(by=["dim_value", "instances"])
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)] filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)]
grouped = filtered.groupby(['dim_value'])['instances'].min() grouped = filtered.groupby(['dim_value'])['instances'].min()
min_suitable_instances = grouped.to_frame().reset_index() min_suitable_instances = grouped.to_frame().reset_index()
min_suitable_instances min_suitable_instances
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
min_suitable_instances.to_csv(os.path.join(directory_out, f'../results-inst/exp{exp_id}_min-suitable-instances.csv'), index=False) min_suitable_instances.to_csv(os.path.join(directory_out, f'exp{exp_id}_min-suitable-instances.csv'), index=False)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
min_suitable_instances.plot(kind='line',x='dim_value',y='instances') min_suitable_instances.plot(kind='line',x='dim_value',y='instances')
# min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True) # min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True)
plt.show() plt.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
``` ```
......
...@@ -153,11 +153,11 @@ declarations for different volume types. ...@@ -153,11 +153,11 @@ declarations for different volume types.
Using a [hostPath volume](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) is the easiest option when Using a [hostPath volume](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) is the easiest option when
running Theodolite locally, e.g., with minikube or kind. running Theodolite locally, e.g., with minikube or kind.
Just modify `infrastructure/kubernetes/volumeSingle.yaml` by setting `path` to the directory on your host machine where Just modify `infrastructure/kubernetes/volume-hostpath.yaml` by setting `path` to the directory on your host machine where
all benchmark results should be stored and run: all benchmark results should be stored and run:
```sh ```sh
kubectl apply -f infrastructure/kubernetes/volumeSingle.yaml kubectl apply -f infrastructure/kubernetes/volume-hostpath.yaml
``` ```
##### *local* volume ##### *local* volume
...@@ -166,12 +166,12 @@ A [local volume](https://kubernetes.io/docs/concepts/storage/volumes/#local) is ...@@ -166,12 +166,12 @@ A [local volume](https://kubernetes.io/docs/concepts/storage/volumes/#local) is
access (e.g. via SSH) to one of your cluster nodes. access (e.g. via SSH) to one of your cluster nodes.
You first need to create a directory on a selected node where all benchmark results should be stored. Next, modify You first need to create a directory on a selected node where all benchmark results should be stored. Next, modify
`infrastructure/kubernetes/volumeCluster.yaml` by setting `<node-name>` to your selected node (this node will most `infrastructure/kubernetes/volume-local.yaml` by setting `<node-name>` to your selected node. (This node will most
likely also execute the job). Further, you have to set `path` to the directory on the node you just created. To deploy likely also execute the [Theodolite job](#Execution).) Further, you have to set `path` to the directory on the node you just created. To deploy
you volume run: you volume run:
```sh ```sh
kubectl apply -f infrastructure/kubernetes/volumeCluster.yaml kubectl apply -f infrastructure/kubernetes/volume-local.yaml
``` ```
##### Other volumes ##### Other volumes
......
...@@ -11,7 +11,7 @@ spec: ...@@ -11,7 +11,7 @@ spec:
claimName: theodolite-pv-claim claimName: theodolite-pv-claim
containers: containers:
- name: theodolite - name: theodolite
image: bvonheid/theodolite:latest image: ghcr.io/cau-se/theodolite:latest
# imagePullPolicy: Never # Used to pull "own" local image # imagePullPolicy: Never # Used to pull "own" local image
env: env:
- name: UC # mandatory - name: UC # mandatory
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment