Merge branch 'master' into feature/127-zookeeper-communication

d6736e05 · Lorenz Boguhn · bdeadef8 · d0299a1d · d6736e05 · d6736e05
Commit d6736e05 authored 4 years ago by Lorenz Boguhn
--- a/analysis/scalability-graph.ipynb
+++ b/analysis/scalability-graph.ipynb
@@ -245,7 +245,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "min_suitable_instances.to_csv(os.path.join(directory_out, f'../results-inst/exp{exp_id}_min-suitable-instances.csv'), index=False)"
+    "min_suitable_instances.to_csv(os.path.join(directory_out, f'exp{exp_id}_min-suitable-instances.csv'), index=False)"
   ]
  },
  {

 %% Cell type:code id: tags:
 ``` 
 print("hello")
 ```
 %% Cell type:code id: tags:
 ``` 
 import os
 from datetime import datetime, timedelta, timezone
 import pandas as pd
 from sklearn.linear_model import LinearRegression
 import matplotlib.pyplot as plt
 ```
 %% Cell type:code id: tags:
 ``` 
 os.getcwd()
 ```
 %% Cell type:code id: tags:
 ``` 
 exp_id = 2012
 warmup_sec = 60
 warmup_partitions_sec = 120
 threshold = 2000 #slope
 #directory = '../results'
 directory = '<path-to>/results'
 directory_out = '<path-to>/results-inst'
 ```
 %% Cell type:code id: tags:outputPrepend,outputPrepend
 ``` 
 #exp_id = 35
 #os.chdir("./results-final")
 raw_runs = []
 filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")]
 for filename in filenames:
    #print(filename)
    run_params = filename[:-4].split("_")
    dim_value = run_params[2]
    instances = run_params[3]
    df = pd.read_csv(os.path.join(directory, filename))
    #input = df.loc[df['topic'] == "input"]
    input = df
    #print(input)
    input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
    #print(input)
    #print(input.iloc[0, 'timestamp'])
    regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
    #regress = input
    #input.plot(kind='line',x='timestamp',y='value',color='red')
    #plt.show()
    X = regress.iloc[:, 2].values.reshape(-1, 1)  # values converts it into a numpy array
    Y = regress.iloc[:, 3].values.reshape(-1, 1)  # -1 means that calculate the dimension of rows, but have 1 column
    linear_regressor = LinearRegression()  # create object for the class
    linear_regressor.fit(X, Y)  # perform linear regression
    Y_pred = linear_regressor.predict(X)  # make predictions
    trend_slope = linear_regressor.coef_[0][0]
    #print(linear_regressor.coef_)
    row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope}
    #print(row)
    raw_runs.append(row)
 lags = pd.DataFrame(raw_runs)
 ```
 %% Cell type:code id: tags:
 ``` 
 lags.head()
 ```
 %% Cell type:code id: tags:
 ``` 
 raw_partitions = []
 filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")]
 for filename in filenames:
    #print(filename)
    run_params = filename[:-4].split("_")
    dim_value = run_params[2]
    instances = run_params[3]
    df = pd.read_csv(os.path.join(directory, filename))
    #input = df.loc[df['topic'] == "input"]
    input = df
    #print(input)
    input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
    #print(input)
    #print(input.iloc[0, 'timestamp'])
    input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
    #regress = input
    input = input.loc[input['topic'] >= 'input']
    mean = input['value'].mean()
    #input.plot(kind='line',x='timestamp',y='value',color='red')
    #plt.show()
    row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean}
    #print(row)
    raw_partitions.append(row)
 partitions = pd.DataFrame(raw_partitions)
 #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
 ```
 %% Cell type:code id: tags:
 ``` 
 raw_obs_instances = []
 filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")]
 for filename in filenames:
    run_params = filename[:-4].split("_")
    dim_value = run_params[2]
    instances = run_params[3]
    df = pd.read_csv(os.path.join(directory, filename))
    if df.empty:
        continue
    #input = df.loc[df['topic'] == "input"]
    input = df
    #print(input)
    input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
    #print(input)
    #print(input.iloc[0, 'timestamp'])
    input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
    #regress = input
    #input = input.loc[input['topic'] >= 'input']
    #mean = input['value'].mean()
    #input.plot(kind='line',x='timestamp',y='value',color='red')
    #plt.show()
    #row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean}
    #print(row)
    raw_obs_instances.append(row)
 obs_instances = pd.DataFrame(raw_obs_instances)
 obs_instances.head()
 ```
 %% Cell type:code id: tags:
 ``` 
 runs = lags
 #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
 #runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)
 #runs.loc[runs['failed']==True]
 ```
 %% Cell type:code id: tags:
 ``` 
 #threshold = 1000
 # Set to true if the trend line has a slope less than
 runs["suitable"] =  runs.apply(lambda row: row['trend_slope'] < threshold, axis=1)
 runs.columns = runs.columns.str.strip()
 runs.sort_values(by=["dim_value", "instances"])
 ```
 %% Cell type:code id: tags:
 ``` 
 filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)]
 grouped = filtered.groupby(['dim_value'])['instances'].min()
 min_suitable_instances = grouped.to_frame().reset_index()
 min_suitable_instances
 ```
 %% Cell type:code id: tags:
 ``` 
-min_suitable_instances.to_csv(os.path.join(directory_out, f'../results-inst/exp{exp_id}_min-suitable-instances.csv'), index=False)
+min_suitable_instances.to_csv(os.path.join(directory_out, f'exp{exp_id}_min-suitable-instances.csv'), index=False)
 ```
 %% Cell type:code id: tags:
 ``` 
 min_suitable_instances.plot(kind='line',x='dim_value',y='instances')
 # min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True)
 plt.show()
 ```
 %% Cell type:code id: tags:
 ``` 
 ```

--- a/execution/README.md
+++ b/execution/README.md
@@ -153,11 +153,11 @@ declarations for different volume types.
 Using a [hostPath volume](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) is the easiest option when
 running Theodolite locally, e.g., with minikube or kind.
-Just modify `infrastructure/kubernetes/volumeSingle.yaml` by setting `path` to the directory on your host machine where
+Just modify `infrastructure/kubernetes/volume-hostpath.yaml` by setting `path` to the directory on your host machine where
 all benchmark results should be stored and run:
 ```sh
-kubectl apply -f infrastructure/kubernetes/volumeSingle.yaml
+kubectl apply -f infrastructure/kubernetes/volume-hostpath.yaml
 ```
 ##### *local* volume
@@ -166,12 +166,12 @@ A [local volume](https://kubernetes.io/docs/concepts/storage/volumes/#local) is
 access (e.g. via SSH) to one of your cluster nodes.
 You first need to create a directory on a selected node where all benchmark results should be stored. Next, modify
-`infrastructure/kubernetes/volumeCluster.yaml` by setting `<node-name>` to your selected node (this node will most
+`infrastructure/kubernetes/volume-local.yaml` by setting `<node-name>` to your selected node. (This node will most
-likely also execute the job). Further, you have to set `path` to the directory on the node you just created. To deploy
+likely also execute the [Theodolite job](#Execution).) Further, you have to set `path` to the directory on the node you just created. To deploy
 you volume run:
 ```sh
-kubectl apply -f infrastructure/kubernetes/volumeCluster.yaml
+kubectl apply -f infrastructure/kubernetes/volume-local.yaml
 ```
 ##### Other volumes

--- a/execution/infrastructure/kubernetes/volumeSingle.yaml
+++ b/execution/infrastructure/kubernetes/volumeSingle.yaml
--- a/execution/infrastructure/kubernetes/volumeCluster.yaml
+++ b/execution/infrastructure/kubernetes/volumeCluster.yaml
--- a/execution/theodolite.yaml
+++ b/execution/theodolite.yaml
@@ -11,7 +11,7 @@ spec:
          claimName: theodolite-pv-claim
      containers:
        - name: theodolite
-          image: bvonheid/theodolite:latest
+          image: ghcr.io/cau-se/theodolite:latest
          # imagePullPolicy: Never # Used to pull "own" local image
          env:
            - name: UC # mandatory