From 964607dadc5c329f174879242c122c823c79f75c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B6ren=20Henning?= <soeren.henning@email.uni-kiel.de>
Date: Thu, 14 Jan 2021 16:36:35 +0100
Subject: [PATCH] Add demand metric plotting notebook

---
 analysis/README.md                |   7 +-
 analysis/demand-metric-plot.ipynb | 173 ++++++++++++++++++++++++++++++
 analysis/demand-metric.ipynb      |   4 +-
 3 files changed, 181 insertions(+), 3 deletions(-)
 create mode 100644 analysis/demand-metric-plot.ipynb

diff --git a/analysis/README.md b/analysis/README.md
index 5fc0179bf..0e32b4493 100644
--- a/analysis/README.md
+++ b/analysis/README.md
@@ -3,13 +3,18 @@
 This directory contains Jupyter notebooks for analyzing and visualizing
 benchmark execution results and plotting. The following notebooks are provided:
 
+* [demand-metric.ipynb](demand-metric.ipynb): Create CSV files describing scalability according to the Theodolite `demand` metric.
+* [demand-metric-plot.ipynb](demand-metric-plot.ipynb): Create plots based on such CSV files.
+
+For legacy reasons, we also provide the following notebooks, which, however, are not documented:
+
 * [scalability-graph.ipynb](scalability-graph.ipynb): Creates a scalability graph for a certain benchmark execution.
 * [scalability-graph-final.ipynb](scalability-graph-final.ipynb): Combines the scalability graphs of multiple benchmarks executions (e.g. for comparing different configuration).
 * [lag-trend-graph.ipynb](lag-trend-graph.ipynb): Visualizes the consumer lag evaluation over time along with the computed trend.
 
 ## Usage
 
-Basically, the Theodolite Analysis Jupyter notebooks should be runnable by any Jupyter server. To make it a bit easier,
+In general, the Theodolite Analysis Jupyter notebooks should be runnable by any Jupyter server. To make it a bit easier,
 we provide introductions for running notebooks with Docker and with Visual Studio Code. These intoduction may also be
 a good starting point for using another service.
 
diff --git a/analysis/demand-metric-plot.ipynb b/analysis/demand-metric-plot.ipynb
new file mode 100644
index 000000000..95f371510
--- /dev/null
+++ b/analysis/demand-metric-plot.ipynb
@@ -0,0 +1,173 @@
+{
+ "cells": [
+  {
+   "source": [
+    "# Theodolite Analysis - Plotting the Demand Metric\n",
+    "\n",
+    "This notebook creates a plot, showing scalability as a function that maps load intensities to the resources required for processing them. It is able to combine multiple such plots in one figure, for example, to compare multiple systems or configurations.\n",
+    "\n",
+    "The notebook takes a CSV file for each plot mapping load intensities to minimum required resources, computed by the `demand-metric-plot.ipynb` notebook."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "First, we need to import some libraries, which are required for creating the plots."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "from functools import reduce\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.ticker import FuncFormatter\n",
+    "from matplotlib.ticker import MaxNLocator"
+   ]
+  },
+  {
+   "source": [
+    "We need to specify the directory, where the demand CSV files can be found, and a dictionary that maps a system description (e.g. its name) to the corresponding CSV file (prefix). "
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_dir = '<path-to>/results'\n",
+    "\n",
+    "experiments = {\n",
+    "    'System XYZ': 'exp200',\n",
+    "}\n"
+   ]
+  },
+  {
+   "source": [
+    "Now, we combie all systems described in `experiments`."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataframes = [pd.read_csv(os.path.join(results_dir, f'{v}_demand.csv')).set_index('load').rename(columns={\"resources\": k}) for k, v in experiments.items()]\n",
+    "\n",
+    "df = reduce(lambda df1,df2: df1.join(df2,how='outer'), dataframes)"
+   ]
+  },
+  {
+   "source": [
+    "We might want to display the mappings before we plot it."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "source": [
+    "The following code creates a MatPlotLib figure showing the scalability plots for all specified systems. You might want to adjust its styling etc. according to your preferences. Make sure to also set a filename."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.style.use('ggplot')\n",
+    "plt.rcParams['axes.facecolor']='w'\n",
+    "plt.rcParams['axes.edgecolor']='555555'\n",
+    "#plt.rcParams['ytick.color']='black'\n",
+    "plt.rcParams['grid.color']='dddddd'\n",
+    "plt.rcParams['axes.spines.top']='false'\n",
+    "plt.rcParams['axes.spines.right']='false'\n",
+    "plt.rcParams['legend.frameon']='true'\n",
+    "plt.rcParams['legend.framealpha']='1'\n",
+    "plt.rcParams['legend.edgecolor']='1'\n",
+    "plt.rcParams['legend.borderpad']='1'\n",
+    "\n",
+    "@FuncFormatter\n",
+    "def load_formatter(x, pos):\n",
+    "    return f'{(x/1000):.0f}k'\n",
+    "\n",
+    "markers = ['s', 'D', 'o', 'v', '^', '<', '>', 'p', 'X']\n",
+    "\n",
+    "def splitSerToArr(ser):\n",
+    "    return [ser.index, ser.as_matrix()]\n",
+    "\n",
+    "plt.figure()\n",
+    "#plt.figure(figsize=(4.8, 3.6)) # For other plot sizes\n",
+    "#ax = df.plot(kind='line', marker='o')\n",
+    "for i, column in enumerate(df):\n",
+    "    plt.plot(df[column].dropna(), marker=markers[i], label=column)\n",
+    "plt.legend()\n",
+    "ax = plt.gca()\n",
+    "#ax = df.plot(kind='line',x='dim_value', legend=False, use_index=True)\n",
+    "ax.set_ylabel('number of instances')\n",
+    "ax.set_xlabel('messages/second')\n",
+    "ax.set_ylim(ymin=0)\n",
+    "#ax.set_xlim(xmin=0)\n",
+    "ax.yaxis.set_major_locator(MaxNLocator(integer=True))\n",
+    "ax.xaxis.set_major_formatter(FuncFormatter(load_formatter))\n",
+    "\n",
+    "plt.savefig('temp.pdf', bbox_inches='tight')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "version": "3.8.5-final"
+  },
+  "orig_nbformat": 2,
+  "file_extension": ".py",
+  "mimetype": "text/x-python",
+  "name": "python",
+  "npconvert_exporter": "python",
+  "pygments_lexer": "ipython3",
+  "version": 3,
+  "kernelspec": {
+   "name": "python37064bitvenvvenv6c432ee1239d4f3cb23f871068b0267d",
+   "display_name": "Python 3.7.0 64-bit ('.venv': venv)",
+   "language": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/analysis/demand-metric.ipynb b/analysis/demand-metric.ipynb
index c86f117dd..78afd0f3a 100644
--- a/analysis/demand-metric.ipynb
+++ b/analysis/demand-metric.ipynb
@@ -35,8 +35,8 @@
     "exp_id = 200\n",
     "warmup_sec = 60\n",
     "max_lag_trend_slope = 2000\n",
-    "directory = '<path-to>/results'\n",
-    "results_dir = '<path-to>/results-inst'\n"
+    "measurement_dir = '<path-to>/measurements'\n",
+    "results_dir = '<path-to>/results'\n"
    ]
   },
   {
-- 
GitLab