Merge remote-tracking branch 'upstream/master' into 109-implement-kotlin-prototype

1948862e · Simon Ehrenstein · 434b753f · 605510cb · 1948862e · 1948862e
Commit 1948862e authored 4 years ago by Simon Ehrenstein
--- a/execution/infrastructure/kafka-lag-exporter/values.yaml
+++ b/execution/infrastructure/kafka-lag-exporter/values.yaml
+image:
+  pullPolicy: IfNotPresent
+
 clusters:
  - name: "my-confluent-cp-kafka"
    bootstrapBrokers: "my-confluent-cp-kafka:9092"

--- a/execution/infrastructure/kafka/values.yaml
+++ b/execution/infrastructure/kafka/values.yaml
@@ -55,6 +55,7 @@ cp-kafka:
    # "min.insync.replicas": 2
    "auto.create.topics.enable": false
    "log.retention.ms": "10000" # 10s
+    #"log.retention.ms": "86400000" # 24h
    "metrics.sample.window.ms": "5000" #5s

 ## ------------------------------------------------------

--- a/execution/infrastructure/kubernetes/volumeSingle.yaml
+++ b/execution/infrastructure/kubernetes/volumeSingle.yaml
--- a/execution/infrastructure/kubernetes/volumeCluster.yaml
+++ b/execution/infrastructure/kubernetes/volumeCluster.yaml
--- a/execution/lib/cli_parser.py
+++ b/execution/lib/cli_parser.py
@@ -136,6 +136,11 @@ def benchmark_parser(description):
                        metavar='<strategy>',
                        default=os.environ.get('SEARCH_STRATEGY', 'default'),
                        help='The benchmarking search strategy. Can be set to default, linear-search or binary-search')
+    parser.add_argument('--threshold',
+                        type=int,
+                        metavar='<threshold>',
+                        default=os.environ.get('THRESHOLD', 2000),
+                        help='The threshold for the trend slop that the search strategies use to determine that a load could be handled')
    return parser



--- a/execution/lib/trend_slope_computer.py
+++ b/execution/lib/trend_slope_computer.py
@@ -2,7 +2,7 @@ from sklearn.linear_model import LinearRegression
 import pandas as pd
 import os

-def compute(directory, filename, warmup_sec, threshold):
+def compute(directory, filename, warmup_sec):
    df = pd.read_csv(os.path.join(directory, filename))
    input = df
    input['sec_start'] = input.loc[0:, 'timestamp'] - input.iloc[0]['timestamp']
@@ -16,4 +16,4 @@ def compute(directory, filename, warmup_sec, threshold):

    trend_slope = linear_regressor.coef_[0][0]

-    return trend_slope
\ No newline at end of file
+    return trend_slope
--- a/execution/run_uc.py
+++ b/execution/run_uc.py
@@ -282,8 +282,16 @@ def run_evaluation(exp_id, uc_id, dim_value, instances, execution_minutes, prome
    :param int execution_minutes: How long the use case where executed.
    """
    print('Run evaluation function')
-    lag_analysis.main(exp_id, f'uc{uc_id}', dim_value, instances,
-                      execution_minutes, prometheus_base_url, result_path)
+    try:
+        lag_analysis.main(exp_id, f'uc{uc_id}', dim_value, instances,
+                          execution_minutes, prometheus_base_url,
+                          result_path)
+    except Exception as e:
+        err_msg = 'Evaluation function failed'
+        print(err_msg)
+        logging.exception(err_msg)
+        print('Benchmark execution continues')
+
    return



--- a/execution/strategies/config.py
+++ b/execution/strategies/config.py
@@ -18,5 +18,6 @@ class ExperimentConfig:
    configurations: dict
    domain_restriction_strategy: object
    search_strategy: object
+    threshold: int
    subexperiment_executor: object
    subexperiment_evaluator: object
--- a/execution/strategies/strategies/search/binary_search_strategy.py
+++ b/execution/strategies/strategies/search/binary_search_strategy.py
@@ -7,8 +7,9 @@ def binary_search(config, dim_value, lower, upper, subexperiment_counter):
        print(f"Run subexperiment {subexperiment_counter} with config {dim_value} {config.replicass[lower]}")
        subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, config.replicass[lower], config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)
        config.subexperiment_executor.execute(subexperiment_config)
-        result = config.subexperiment_evaluator.execute(subexperiment_config)
-        if result==1: # successful, the upper neighbor is assumed to also has been successful
+        success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                         config.threshold)
+        if success: # successful, the upper neighbor is assumed to also has been successful
            return (lower, subexperiment_counter+1)
        else: # not successful
            return (lower+1, subexperiment_counter)
@@ -16,15 +17,17 @@ def binary_search(config, dim_value, lower, upper, subexperiment_counter):
        print(f"Run subexperiment {subexperiment_counter} with config {dim_value} {config.replicass[lower]}")
        subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, config.replicass[lower], config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)
        config.subexperiment_executor.execute(subexperiment_config)
-        result = config.subexperiment_evaluator.execute(subexperiment_config)
-        if result==1: # minimal instances found
+        success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                         config.threshold)
+        if success: # minimal instances found
            return (lower, subexperiment_counter)
        else: # not successful, check if lower+1 instances are sufficient
            print(f"Run subexperiment {subexperiment_counter} with config {dim_value} {config.replicass[upper]}")
            subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, config.replicass[upper], config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)
            config.subexperiment_executor.execute(subexperiment_config)
-            result = config.subexperiment_evaluator.execute(subexperiment_config)
-            if result == 1: # minimal instances found
+            success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                             config.threshold)
+            if success: # minimal instances found
                return (upper, subexperiment_counter)
            else:
                return (upper+1, subexperiment_counter)
@@ -34,8 +37,9 @@ def binary_search(config, dim_value, lower, upper, subexperiment_counter):
        print(f"Run subexperiment {subexperiment_counter} with config {dim_value} {config.replicass[mid]}")
        subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, config.replicass[mid], config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)
        config.subexperiment_executor.execute(subexperiment_config)
-        result = config.subexperiment_evaluator.execute(subexperiment_config)
-        if result == 1: # success -> search in (lower, mid-1)
+        success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                         config.threshold)
+        if success: # success -> search in (lower, mid-1)
            return binary_search(config, dim_value, lower, mid-1, subexperiment_counter+1)
        else: # not success -> search in (mid+1, upper)
            return binary_search(config, dim_value, mid+1, upper, subexperiment_counter+1)

--- a/execution/strategies/strategies/search/check_all_strategy.py
+++ b/execution/strategies/strategies/search/check_all_strategy.py
@@ -2,23 +2,30 @@
 import os
 from strategies.strategies.config import SubexperimentConfig

+
 def execute(config, dim_value_index, lower_replicas_bound_index, subexperiment_counter):
-    new_lower_replicas_bound_index=lower_replicas_bound_index
-    new_lower_replicas_bound_found=False
-    subexperiments_total=len(config.dim_values)*len(config.replicass)
+    new_lower_replicas_bound_index = lower_replicas_bound_index
+    new_lower_replicas_bound_found = False
+    subexperiments_total = len(config.dim_values) * len(config.replicass)
    while lower_replicas_bound_index < len(config.replicass):
-        subexperiment_counter+=1
-        dim_value=config.dim_values[dim_value_index]
-        replicas=config.replicass[lower_replicas_bound_index]
-        print(f"Run subexperiment {subexperiment_counter} of {subexperiments_total} with dimension value {dim_value} and {replicas} replicas.")
+        subexperiment_counter += 1
+        dim_value = config.dim_values[dim_value_index]
+        replicas = config.replicass[lower_replicas_bound_index]
+        print(
+            f"Run subexperiment {subexperiment_counter} of {subexperiments_total} with dimension value {dim_value} and {replicas} replicas.")

-        subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, replicas, config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)
+        subexperiment_config = SubexperimentConfig(
+            config.use_case, config.exp_id, subexperiment_counter, dim_value,
+            replicas, config.partitions, config.cpu_limit, config.memory_limit,
+            config.execution_minutes, config.prometheus_base_url, config.reset,
+            config.namespace, config.result_path, config.configurations)

        config.subexperiment_executor.execute(subexperiment_config)

-        result = config.subexperiment_evaluator.execute(subexperiment_config) == 1
-        if result == 1 and not new_lower_replicas_bound_found:
+        success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                         config.threshold)
+        if success and not new_lower_replicas_bound_found:
            new_lower_replicas_bound_found = True
            new_lower_replicas_bound_index = lower_replicas_bound_index
-        lower_replicas_bound_index+=1
+        lower_replicas_bound_index += 1
    return (new_lower_replicas_bound_index, subexperiment_counter)
--- a/execution/strategies/strategies/search/linear_search_strategy.py
+++ b/execution/strategies/strategies/search/linear_search_strategy.py
@@ -14,8 +14,9 @@ def execute(config, dim_value_index, lower_replicas_bound_index, subexperiment_c
        subexperiment_config = SubexperimentConfig(config.use_case, config.exp_id, subexperiment_counter, dim_value, replicas, config.partitions, config.cpu_limit, config.memory_limit, config.execution_minutes, config.prometheus_base_url, config.reset, config.namespace, config.result_path, config.configurations)

        config.subexperiment_executor.execute(subexperiment_config)
-        result = config.subexperiment_evaluator.execute(subexperiment_config)
-        if result == 1:
+        success = config.subexperiment_evaluator.execute(subexperiment_config,
+                                                         config.threshold)
+        if success:
            return (lower_replicas_bound_index, subexperiment_counter)
        else:
            lower_replicas_bound_index+=1

--- a/execution/strategies/subexperiment_evaluation/subexperiment_evaluator.py
+++ b/execution/strategies/subexperiment_evaluation/subexperiment_evaluator.py
-import os
-import sys
-import os
 import lib.trend_slope_computer as trend_slope_computer
+import logging
+import os

-THRESHOLD = 2000
 WARMUP_SEC = 60

-def execute(config):
+def execute(config, threshold):
+    """
+    Check the trend slope of the totallag of the subexperiment if it comes below
+    the threshold.
+
+    :param config: Configuration of the subexperiment.
+    :param threshold: The threshold the trendslope need to come below.
+    """
    cwd = f'{os.getcwd()}/{config.result_path}'
    file = f"exp{config.exp_id}_uc{config.use_case}_{config.dim_value}_{config.replicas}_totallag.csv"

-    trend_slope = trend_slope_computer.compute(cwd, file, WARMUP_SEC, THRESHOLD)
+    try:
+        trend_slope = trend_slope_computer.compute(cwd, file, WARMUP_SEC)
+    except Exception as e:
+        err_msg = 'Computing trend slope failed'
+        print(err_msg)
+        logging.exception(err_msg)
+        print('Mark this subexperiment as not successful and continue benchmark')
+        return False

    print(f"Trend Slope: {trend_slope}")
-    success = 0 if trend_slope > THRESHOLD else 1
-    return success
+
+    return trend_slope < threshold
--- a/execution/theodolite.py
+++ b/execution/theodolite.py
@@ -31,8 +31,8 @@ def load_variables():


 def main(uc, loads, instances_list, partitions, cpu_limit, memory_limit,
-         duration, domain_restriction, search_strategy, prometheus_base_url,
-         reset, namespace, result_path, configurations):
+         duration, domain_restriction, search_strategy, threshold,
+         prometheus_base_url, reset, namespace, result_path, configurations):

    print(
        f"Domain restriction of search space activated: {domain_restriction}")
@@ -107,6 +107,7 @@ def main(uc, loads, instances_list, partitions, cpu_limit, memory_limit,
        result_path=result_path,
        domain_restriction_strategy=domain_restriction_strategy,
        search_strategy=search_strategy_method,
+        threshold=threshold,
        subexperiment_executor=subexperiment_executor,
        subexperiment_evaluator=subexperiment_evaluator)

@@ -119,10 +120,11 @@ if __name__ == '__main__':
    args = load_variables()
    if args.reset_only:
        print('Only reset the cluster')
-        run_uc.main(None, None, None, None, None, None, None, None,
-                    None, None, args.namespace, None, None, reset_only=True)
+        run_uc.main(None, None, None, None, None, None, None, None, None,
+                    None, args.namespace, None, None, reset_only=True)
    else:
        main(args.uc, args.loads, args.instances_list, args.partitions,
             args.cpu_limit, args.memory_limit, args.duration,
-             args.domain_restriction, args.search_strategy, args.prometheus,
-             args.reset, args.namespace, args.path, args.configurations)
+             args.domain_restriction, args.search_strategy,
+             args.threshold, args.prometheus, args.reset, args.namespace,
+             args.path, args.configurations)
--- a/execution/theodolite.yaml
+++ b/execution/theodolite.yaml
@@ -11,7 +11,7 @@ spec:
          claimName: theodolite-pv-claim
      containers:
        - name: theodolite
-          image: bvonheid/theodolite:latest
+          image: ghcr.io/cau-se/theodolite:latest
          # imagePullPolicy: Never # Used to pull "own" local image
          env:
            - name: UC # mandatory