From 7bfa849bf77c10d0339b027c27703093e27beb1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6ren=20Henning?= <post@soeren-henning.de> Date: Thu, 30 Apr 2020 17:49:23 +0200 Subject: [PATCH] Remove partition and instances integration for know --- execution/scalability-graph.ipynb | 35 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/execution/scalability-graph.ipynb b/execution/scalability-graph.ipynb index 5d12975ac..401cfae09 100644 --- a/execution/scalability-graph.ipynb +++ b/execution/scalability-graph.ipynb @@ -38,10 +38,11 @@ "metadata": {}, "outputs": [], "source": [ - "exp_id = 1005\n", + "exp_id = 1009\n", "warmup_sec = 60\n", "warmup_partitions_sec = 120\n", - "threshold = 2000 #slope\n" + "threshold = 2000 #slope\n", + "directory = './results-final'\n" ] }, { @@ -61,14 +62,14 @@ "\n", "raw_runs = []\n", "\n", - "filenames = [filename for filename in os.listdir('.') if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"totallag.csv\")]\n", + "filenames = [filename for filename in os.listdir(directory) if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"totallag.csv\")]\n", "for filename in filenames:\n", " #print(filename)\n", " run_params = filename[:-4].split(\"_\")\n", " dim_value = run_params[2]\n", " instances = run_params[3]\n", "\n", - " df = pd.read_csv(filename)\n", + " df = pd.read_csv(os.path.join(directory, filename))\n", " #input = df.loc[df['topic'] == \"input\"]\n", " input = df\n", " #print(input)\n", @@ -103,7 +104,7 @@ "metadata": {}, "outputs": [], "source": [ - "runs.head()" + "lags.head()" ] }, { @@ -115,14 +116,14 @@ "\n", "raw_partitions = []\n", "\n", - "filenames = [filename for filename in os.listdir('.') if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"partitions.csv\")]\n", + "filenames = [filename for filename in os.listdir(directory) if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"partitions.csv\")]\n", "for filename in filenames:\n", " #print(filename)\n", " run_params = filename[:-4].split(\"_\")\n", " dim_value = run_params[2]\n", " instances = run_params[3]\n", "\n", - " df = pd.read_csv(filename)\n", + " df = pd.read_csv(os.path.join(directory, filename))\n", " #input = df.loc[df['topic'] == \"input\"]\n", " input = df\n", " #print(input)\n", @@ -146,7 +147,7 @@ "\n", "partitions = pd.DataFrame(raw_partitions)\n", "\n", - "runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])" + "#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])" ] }, { @@ -157,14 +158,17 @@ "source": [ "raw_obs_instances = []\n", "\n", - "filenames = [filename for filename in os.listdir('.') if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"instances.csv\")]\n", + "filenames = [filename for filename in os.listdir(directory) if filename.startswith(f\"exp{exp_id}\") and filename.endswith(\"instances.csv\")]\n", "for filename in filenames:\n", - " #print(filename)\n", " run_params = filename[:-4].split(\"_\")\n", " dim_value = run_params[2]\n", " instances = run_params[3]\n", "\n", - " df = pd.read_csv(filename)\n", + " df = pd.read_csv(os.path.join(directory, filename))\n", + "\n", + " if df.empty:\n", + " continue\n", + "\n", " #input = df.loc[df['topic'] == \"input\"]\n", " input = df\n", " #print(input)\n", @@ -188,7 +192,7 @@ "\n", "obs_instances = pd.DataFrame(raw_obs_instances)\n", "\n", - "#obs_instances.head()" + "obs_instances.head()" ] }, { @@ -197,11 +201,12 @@ "metadata": {}, "outputs": [], "source": [ - "runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances']).join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])\n", + "runs = lags\n", + "#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])\n", "\n", - "runs[\"failed\"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)\n", + "#runs[\"failed\"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)\n", "\n", - "runs.loc[runs['failed']==True]" + "#runs.loc[runs['failed']==True]" ] }, { -- GitLab