Skip to content
Snippets Groups Projects
Commit 6052e48e authored by Sören Henning's avatar Sören Henning
Browse files

Fix output path

parent 11f835ec
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
print("hello") print("hello")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
import os import os
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import pandas as pd import pandas as pd
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
os.getcwd() os.getcwd()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
exp_id = 2012 exp_id = 2012
warmup_sec = 60 warmup_sec = 60
warmup_partitions_sec = 120 warmup_partitions_sec = 120
threshold = 2000 #slope threshold = 2000 #slope
#directory = '../results' #directory = '../results'
directory = '<path-to>/results' directory = '<path-to>/results'
directory_out = '<path-to>/results-inst' directory_out = '<path-to>/results-inst'
``` ```
%% Cell type:code id: tags:outputPrepend,outputPrepend %% Cell type:code id: tags:outputPrepend,outputPrepend
``` ```
#exp_id = 35 #exp_id = 35
#os.chdir("./results-final") #os.chdir("./results-final")
raw_runs = [] raw_runs = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("totallag.csv")]
for filename in filenames: for filename in filenames:
#print(filename) #print(filename)
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up regress = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array X = regress.iloc[:, 2].values.reshape(-1, 1) # values converts it into a numpy array
Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column Y = regress.iloc[:, 3].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column
linear_regressor = LinearRegression() # create object for the class linear_regressor = LinearRegression() # create object for the class
linear_regressor.fit(X, Y) # perform linear regression linear_regressor.fit(X, Y) # perform linear regression
Y_pred = linear_regressor.predict(X) # make predictions Y_pred = linear_regressor.predict(X) # make predictions
trend_slope = linear_regressor.coef_[0][0] trend_slope = linear_regressor.coef_[0][0]
#print(linear_regressor.coef_) #print(linear_regressor.coef_)
row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope} row = {'dim_value': int(dim_value), 'instances': int(instances), 'trend_slope': trend_slope}
#print(row) #print(row)
raw_runs.append(row) raw_runs.append(row)
lags = pd.DataFrame(raw_runs) lags = pd.DataFrame(raw_runs)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
lags.head() lags.head()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
raw_partitions = [] raw_partitions = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("partitions.csv")]
for filename in filenames: for filename in filenames:
#print(filename) #print(filename)
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
input = input.loc[input['topic'] >= 'input'] input = input.loc[input['topic'] >= 'input']
mean = input['value'].mean() mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean} row = {'dim_value': int(dim_value), 'instances': int(instances), 'partitions': mean}
#print(row) #print(row)
raw_partitions.append(row) raw_partitions.append(row)
partitions = pd.DataFrame(raw_partitions) partitions = pd.DataFrame(raw_partitions)
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances']) #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
raw_obs_instances = [] raw_obs_instances = []
filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")] filenames = [filename for filename in os.listdir(directory) if filename.startswith(f"exp{exp_id}") and filename.endswith("instances.csv")]
for filename in filenames: for filename in filenames:
run_params = filename[:-4].split("_") run_params = filename[:-4].split("_")
dim_value = run_params[2] dim_value = run_params[2]
instances = run_params[3] instances = run_params[3]
df = pd.read_csv(os.path.join(directory, filename)) df = pd.read_csv(os.path.join(directory, filename))
if df.empty: if df.empty:
continue continue
#input = df.loc[df['topic'] == "input"] #input = df.loc[df['topic'] == "input"]
input = df input = df
#print(input) #print(input)
input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp'] input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
#print(input) #print(input)
#print(input.iloc[0, 'timestamp']) #print(input.iloc[0, 'timestamp'])
input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up input = input.loc[input['sec_start'] >= warmup_sec] # Warm-Up
#regress = input #regress = input
#input = input.loc[input['topic'] >= 'input'] #input = input.loc[input['topic'] >= 'input']
#mean = input['value'].mean() #mean = input['value'].mean()
#input.plot(kind='line',x='timestamp',y='value',color='red') #input.plot(kind='line',x='timestamp',y='value',color='red')
#plt.show() #plt.show()
#row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean} #row = {'dim_value': int(dim_value), 'instances': int(instances), 'obs_instances': mean}
#print(row) #print(row)
raw_obs_instances.append(row) raw_obs_instances.append(row)
obs_instances = pd.DataFrame(raw_obs_instances) obs_instances = pd.DataFrame(raw_obs_instances)
obs_instances.head() obs_instances.head()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
runs = lags runs = lags
#runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances']) #runs = lags.join(partitions.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])#.join(obs_instances.set_index(['dim_value', 'instances']), on=['dim_value', 'instances'])
#runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1) #runs["failed"] = runs.apply(lambda row: (abs(row['instances'] - row['obs_instances']) / row['instances']) > 0.1, axis=1)
#runs.loc[runs['failed']==True] #runs.loc[runs['failed']==True]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#threshold = 1000 #threshold = 1000
# Set to true if the trend line has a slope less than # Set to true if the trend line has a slope less than
runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1) runs["suitable"] = runs.apply(lambda row: row['trend_slope'] < threshold, axis=1)
runs.columns = runs.columns.str.strip() runs.columns = runs.columns.str.strip()
runs.sort_values(by=["dim_value", "instances"]) runs.sort_values(by=["dim_value", "instances"])
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)] filtered = runs[runs.apply(lambda x: x['suitable'], axis=1)]
grouped = filtered.groupby(['dim_value'])['instances'].min() grouped = filtered.groupby(['dim_value'])['instances'].min()
min_suitable_instances = grouped.to_frame().reset_index() min_suitable_instances = grouped.to_frame().reset_index()
min_suitable_instances min_suitable_instances
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
min_suitable_instances.to_csv(os.path.join(directory_out, f'../results-inst/exp{exp_id}_min-suitable-instances.csv'), index=False) min_suitable_instances.to_csv(os.path.join(directory_out, f'exp{exp_id}_min-suitable-instances.csv'), index=False)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
min_suitable_instances.plot(kind='line',x='dim_value',y='instances') min_suitable_instances.plot(kind='line',x='dim_value',y='instances')
# min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True) # min_suitable_instances.plot(kind='line',x='dim_value',y='instances', logy=True)
plt.show() plt.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment