From 1e8e383ffc12412ffc9418fff0bad35327e26990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6ren=20Henning?= <soeren.henning@email.uni-kiel.de> Date: Thu, 14 Jan 2021 15:55:37 +0100 Subject: [PATCH] Add demand metric notebook --- analysis/demand-metric.ipynb | 117 +++++++++++++++++++++++++++++++++++ analysis/src/demand.py | 59 ++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 analysis/demand-metric.ipynb create mode 100644 analysis/src/demand.py diff --git a/analysis/demand-metric.ipynb b/analysis/demand-metric.ipynb new file mode 100644 index 000000000..c86f117dd --- /dev/null +++ b/analysis/demand-metric.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "source": [ + "# Theodolite Analysis - Demand Metric\n", + "\n", + "This notebook allows applies Theodolite's *demand* metric to describe scalability of a SUT based on Theodolite measurement data.\n", + "\n", + "Theodolite's *demand* metric is a function, mapping load intensities to the minimum required resources (e.g., instances) that are required to process this load. With this notebook, the *demand* metric function is approximated by a map of tested load intensities to their minimum required resources.\n", + "\n", + "The final output when running this notebook will be a CSV file, providig this mapping. It can be used to create nice plots of a system's scalability using the `demand-metric-plot.ipynb` notebook." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "In the following cell, we need to specifiy:\n", + "\n", + "* `exp_id`: The experiment id that is to be analyzed.\n", + "* `warmup_sec`: The number of seconds which are to be ignored in the beginning of each experiment.\n", + "* `max_lag_trend_slope`: The maximum tolerable increase in queued messages per second.\n", + "* `measurement_dir`: The directory where the measurement data files are to be found.\n", + "* `results_dir`: The directory where the computed demand CSV files are to be stored." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "exp_id = 200\n", + "warmup_sec = 60\n", + "max_lag_trend_slope = 2000\n", + "directory = '<path-to>/results'\n", + "results_dir = '<path-to>/results-inst'\n" + ] + }, + { + "source": [ + "With the following call, we compute our demand mapping." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from src.demand import demand\n", + "\n", + "demand = demand(exp_id, measurement_dir, max_lag_trend_slope, warmup_sec)" + ] + }, + { + "source": [ + "We might already want to plot a simple visualization here:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "demand.plot(kind='line',x='load',y='resources')" + ] + }, + { + "source": [ + "Finally we store the results in a CSV file." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "demand.to_csv(os.path.join(results_dir, f'exp{exp_id}_demand.csv'), index=False)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "version": "3.8.5-final" + }, + "orig_nbformat": 2, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3, + "kernelspec": { + "name": "python37064bitvenvvenv6c432ee1239d4f3cb23f871068b0267d", + "display_name": "Python 3.7.0 64-bit ('.venv': venv)", + "language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/analysis/src/demand.py b/analysis/src/demand.py new file mode 100644 index 000000000..dfb20c05a --- /dev/null +++ b/analysis/src/demand.py @@ -0,0 +1,59 @@ +import os +from datetime import datetime, timedelta, timezone +import pandas as pd +from sklearn.linear_model import LinearRegression + +def demand(exp_id, directory, threshold, warmup_sec): + raw_runs = [] + + # Compute SL, i.e., lag trend, for each tested configuration + filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")] + for filename in filenames: + #print(filename) + run_params = filename[:-4].split("_") + dim_value = run_params[2] + instances = run_params[3] + + df = pd.read_csv(os.path.join(directory, filename)) + #input = df.loc[df['topic'] == "input"] + input = df + #print(input) + input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] + #print(input) + #print(input.iloc[0, 'timestamp']) + regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up + #regress = input + + #input.plot(kind='line',x='timestamp',y='value',color='red') + #plt.show() + + X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array + Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column + linear_regressor = LinearRegression() # create object for the class + linear_regressor.fit(X, Y) # perform linear regression + Y_pred = linear_regressor.predict(X) # make predictions + + trend_slope = linear_regressor.coef_[0][0] + #print(linear_regressor.coef_) + + row = {'load': int(dim_value), 'resources': int(instances), 'trend_slope': trend_slope} + #print(row) + raw_runs.append(row) + + runs = pd.DataFrame(raw_runs) + + # Set suitable = True if SLOs are met, i.e., lag trend is below threshold + runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1) + + # Sort results table (unsure if required) + runs.columns = runs.columns.str.strip() + runs.sort_values(by=["load", "resources"]) + + # Filter only suitable configurations + filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)] + + # Compute demand per load intensity + grouped = filtered.groupby(['load'])['resources'].min() + demand_per_load = grouped.to_frame().reset_index() + + return demand_per_load -- GitLab