Merge branch '250-add-dropped-records-slo-correct-branch' into 'master'

Add new SLO checker for the metric of skipped/dropped-records Closes #250 See merge request !185

Merge branch '250-add-dropped-records-slo-correct-branch' into 'master'
Add new SLO checker for the metric of skipped/dropped-records Closes #250 See merge request !185
aabe2ce1 · Sören Henning · c548f277 · c4c34a35 · aabe2ce1 · aabe2ce1
Commit aabe2ce1 authored 3 years ago by Sören Henning
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -319,7 +319,18 @@ test-slo-checker-lag-trend:
  tags:
    - exec-docker
  script:
-    - cd slope-evaluator
+    - cd slo-checker/record-lag
+    - pip install -r requirements.txt
+    - cd app
+    - python -m unittest
+
+test-slo-checker-dropped-records-kstreams:
+  stage: test
+  image: python:3.7-slim
+  tags:
+    - exec-docker
+  script:
+    - cd slo-checker/dropped-records
    - pip install -r requirements.txt
    - cd app
    - python -m unittest
@@ -332,7 +343,7 @@ deploy-slo-checker-lag-trend:
    - test-slo-checker-lag-trend
  script:
    - DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
-    - docker build --pull -t theodolite-slo-checker-lag-trend slope-evaluator
+    - docker build --pull -t theodolite-slo-checker-lag-trend slo-checker/record-lag
    - "[ ! $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-lag-trend $CR_HOST/$CR_ORG/theodolite-slo-checker-lag-trend:${DOCKER_TAG_NAME}latest"
    - "[ $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-lag-trend $CR_HOST/$CR_ORG/theodolite-slo-checker-lag-trend:$CI_COMMIT_TAG"
    - echo $CR_PW | docker login $CR_HOST -u $CR_USER --password-stdin
@@ -342,7 +353,32 @@ deploy-slo-checker-lag-trend:
    - if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW && $CI_COMMIT_TAG"
      when: always
    - changes:
-      - slope-evaluator/**/*
+      - slo-checker/record-lag/**/*
+      if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
+      when: always
+    - if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
+      when: manual
+      allow_failure: true
+
+deploy-slo-checker-dropped-records-kstreams:
+  stage: deploy
+  extends:
+    - .dind
+  needs:
+    - test-slo-checker-dropped-records-kstreams
+  script:
+    - DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
+    - docker build --pull -t theodolite-slo-checker-dropped-records-kstreams slo-checker/dropped-records
+    - "[ ! $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-dropped-records-kstreams $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams:${DOCKER_TAG_NAME}latest"
+    - "[ $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-dropped-records-kstreams $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams:$CI_COMMIT_TAG"
+    - echo $CR_PW | docker login $CR_HOST -u $CR_USER --password-stdin
+    - docker push $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams
+    - docker logout
+  rules:
+    - if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW && $CI_COMMIT_TAG"
+      when: always
+    - changes:
+      - slo-checker/dropped-records/**/*
      if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
      when: always
    - if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"

--- a/helm/templates/theodolite/theodolite-operator.yaml
+++ b/helm/templates/theodolite/theodolite-operator.yaml
@@ -64,6 +64,17 @@ spec:
          - name: LOG_LEVEL
            value: INFO
        {{- end }}
+        {{- if .Values.operator.sloChecker.droppedRecordsKStreams.enabled }}
+        - name: slo-checker-dropped-records-kstreams
+          image: "{{ .Values.operator.sloChecker.droppedRecordsKStreams.image }}:{{ .Values.operator.sloChecker.droppedRecordsKStreams.imageTag }}"
+          imagePullPolicy: "{{ .Values.operator.sloChecker.droppedRecordsKStreams.imagePullPolicy }}"
+          ports:
+          - containerPort: 80
+            name: analysis
+          env:
+          - name: LOG_LEVEL
+            value: INFO
+        {{- end }}
        {{- if and .Values.operator.resultsVolume.enabled .Values.operator.resultsVolume.accessSidecar.enabled }}
        - name: results-access
          image: busybox:stable

--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -256,6 +256,11 @@ operator:
      image: ghcr.io/cau-se/theodolite-slo-checker-lag-trend
      imageTag: latest
      imagePullPolicy: Always
+    droppedRecordsKStreams:
+      enabled: true
+      image: ghcr.io/cau-se/theodolite-slo-checker-dropped-records-kstreams
+      imageTag: latest
+      imagePullPolicy: Always

  resultsVolume:
    enabled: true

--- a/slope-evaluator/Dockerfile
+++ b/slope-evaluator/Dockerfile
--- a/slo-checker/dropped-records/README.md
+++ b/slo-checker/dropped-records/README.md
+# Kafka Streams Dropped Record SLO Evaluator
+
+## Execution
+
+For development:
+
+```sh
+uvicorn main:app --reload  --port 81 # run this command inside the app/ folder
+```
+
+## Build the docker image:
+
+```sh
+docker build . -t theodolite-evaluator
+```
+
+Run the Docker image:
+
+```sh
+docker run -p 80:81 theodolite-evaluator
+```
+
+## Configuration
+
+You can set the `HOST` and the `PORT` (and a lot of more parameters) via environment variables. Default is `0.0.0.0:80`.
+For more information see the [Gunicorn/FastAPI Docker docs](https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker#advanced-usage).
+
+## API Documentation
+
+The running webserver provides a REST API with the following route:
+
+* /dropped-records
+  * Method: POST
+  * Body:
+    * results
+      * metric-metadata
+      * values
+    * metadata
+      * threshold
+      * warmup
+
+The body of the request must be a JSON string that satisfies the following conditions:
+
+* **dropped records**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON *structure*:
+
+    ```json
+    {
+        "results": [
+            [
+                {
+                    "metric": {
+                        "<label-name>": "<label-value>"
+                    },
+                    "values": [
+                        [
+                            <unix_timestamp>, // 1.634624989695E9
+                            "<sample_value>" // integer
+                        ]
+                    ]
+                }
+            ]
+        ],
+        "metadata": {
+            "threshold": 2000000,
+            "warmup": 60
+        }
+    }
+    ```
+
+### description
+
+* results:
+  * metric-metadata:
+    * Labels of this metric. The `dropped-records` slo checker does not use labels in the calculation of the service level objective.
+  * results
+    * The `<unix_timestamp>` provided as the first element of each element in the "values" array must be the timestamp of the measurement value in seconds (with optional decimal precision)
+    * The `<sample_value>` must be the measurement value as string.
+* metadata: For the calculation of the service level objective require metadata.
+  * **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
+  * **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
--- a/slo-checker/dropped-records/app/main.py
+++ b/slo-checker/dropped-records/app/main.py
+from fastapi import FastAPI,Request
+import logging
+import os
+import json
+import sys
+
+app = FastAPI()
+
+logging.basicConfig(stream=sys.stdout,
+                    format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+logger = logging.getLogger("API")
+
+
+if os.getenv('LOG_LEVEL') == 'INFO':
+    logger.setLevel(logging.INFO)
+elif os.getenv('LOG_LEVEL') == 'WARNING':
+    logger.setLevel(logging.WARNING)
+elif os.getenv('LOG_LEVEL') == 'DEBUG':
+    logger.setLevel(logging.DEBUG)
+
+
+def check_service_level_objective(results, threshold):
+    return max(results) < threshold
+
+@app.post("/dropped-records",response_model=bool)
+async def evaluate_slope(request: Request):
+    data = json.loads(await request.body())
+    warmup = int(data['results'][0][0]['values'][0][0]) + int(data['metadata']['warmup'])
+    results = [int(val[1]) if(int(val[0]>=warmup)) else 0 for result in data['results'] for r in result for val in r['values']  ]
+    return check_service_level_objective(results=results, threshold=data['metadata']["threshold"])
+
+logger.info("SLO evaluator is online")
\ No newline at end of file
--- a/slo-checker/dropped-records/app/test.py
+++ b/slo-checker/dropped-records/app/test.py
+import unittest
+from main import app, check_service_level_objective
+import numpy as np
+import json
+from fastapi.testclient import TestClient
+
+class TestSloEvaluation(unittest.TestCase):
+    client = TestClient(app)
+
+    def test_1_rep(self):
+        with open('../resources/test-1-rep-success.json') as json_file:
+            data = json.load(json_file)
+            response = self.client.post("/dropped-records", json=data)
+            self.assertEquals(response.json(), True)
+
+    def test_check_service_level_objective(self):
+        list = [ x for x in range(-100, 100) ]
+
+        self.assertEquals(check_service_level_objective(list, 90), False)
+        self.assertEquals(check_service_level_objective(list, 110), True)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
--- a/slope-evaluator/requirements.txt
+++ b/slope-evaluator/requirements.txt
--- a/slo-checker/dropped-records/resources/test-1-rep-success.json
+++ b/slo-checker/dropped-records/resources/test-1-rep-success.json
+{
+    "results": [
+        [
+            {
+                "metric": {
+                    "job": "titan-ccp-aggregation"
+                },
+                "values": [
+                    [
+                        1.634624674695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624679695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624684695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624689695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624694695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624699695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624704695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624709695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624714695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624719695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624724695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624729695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624734695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624739695E9,
+                        "0"
+                    ],
+                    [
+                        1.634624744695E9,
+                        "1"
+                    ],
+                    [
+                        1.634624749695E9,
+                        "3"
+                    ],
+                    [
+                        1.634624754695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624759695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624764695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624769695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624774695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624779695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624784695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624789695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624794695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624799695E9,
+                        "4"
+                    ],
+                    [
+                        1.634624804695E9,
+                        "176"
+                    ],
+                    [
+                        1.634624809695E9,
+                        "176"
+                    ],
+                    [
+                        1.634624814695E9,
+                        "176"
+                    ],
+                    [
+                        1.634624819695E9,
+                        "176"
+                    ],
+                    [
+                        1.634624824695E9,
+                        "176"
+                    ],
+                    [
+                        1.634624829695E9,
+                        "159524"
+                    ],
+                    [
+                        1.634624834695E9,
+                        "209870"
+                    ],
+                    [
+                        1.634624839695E9,
+                        "278597"
+                    ],
+                    [
+                        1.634624844695E9,
+                        "460761"
+                    ],
+                    [
+                        1.634624849695E9,
+                        "460761"
+                    ],
+                    [
+                        1.634624854695E9,
+                        "460761"
+                    ],
+                    [
+                        1.634624859695E9,
+                        "460761"
+                    ],
+                    [
+                        1.634624864695E9,
+                        "460761"
+                    ],
+                    [
+                        1.634624869695E9,
+                        "606893"
+                    ],
+                    [
+                        1.634624874695E9,
+                        "653534"
+                    ],
+                    [
+                        1.634624879695E9,
+                        "755796"
+                    ],
+                    [
+                        1.634624884695E9,
+                        "919317"
+                    ],
+                    [
+                        1.634624889695E9,
+                        "919317"
+                    ],
+                    [
+                        1.634624894695E9,
+                        "955926"
+                    ],
+                    [
+                        1.634624899695E9,
+                        "955926"
+                    ],
+                    [
+                        1.634624904695E9,
+                        "955926"
+                    ],
+                    [
+                        1.634624909695E9,
+                        "955926"
+                    ],
+                    [
+                        1.634624914695E9,
+                        "955926"
+                    ],
+                    [
+                        1.634624919695E9,
+                        "1036530"
+                    ],
+                    [
+                        1.634624924695E9,
+                        "1078477"
+                    ],
+                    [
+                        1.634624929695E9,
+                        "1194775"
+                    ],
+                    [
+                        1.634624934695E9,
+                        "1347755"
+                    ],
+                    [
+                        1.634624939695E9,
+                        "1352151"
+                    ],
+                    [
+                        1.634624944695E9,
+                        "1360428"
+                    ],
+                    [
+                        1.634624949695E9,
+                        "1360428"
+                    ],
+                    [
+                        1.634624954695E9,
+                        "1360428"
+                    ],
+                    [
+                        1.634624959695E9,
+                        "1360428"
+                    ],
+                    [
+                        1.634624964695E9,
+                        "1360428"
+                    ],
+                    [
+                        1.634624969695E9,
+                        "1525685"
+                    ],
+                    [
+                        1.634624974695E9,
+                        "1689296"
+                    ],
+                    [
+                        1.634624979695E9,
+                        "1771358"
+                    ],
+                    [
+                        1.634624984695E9,
+                        "1854284"
+                    ],
+                    [
+                        1.634624989695E9,
+                        "1854284"
+                    ]
+                ]
+            }
+        ]
+    ],
+    "metadata": {
+        "threshold": 2000000,
+        "warmup": 60
+    }
+}
\ No newline at end of file
--- a/slo-checker/record-lag/Dockerfile
+++ b/slo-checker/record-lag/Dockerfile
+FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7
+
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+
+COPY ./app /app
\ No newline at end of file
--- a/slope-evaluator/README.md
+++ b/slope-evaluator/README.md
@@ -25,37 +25,56 @@ docker run -p 80:80 theodolite-evaluator
 You can set the `HOST` and the `PORT` (and a lot of more parameters) via environment variables. Default is `0.0.0.0:80`.
 For more information see the [Gunicorn/FastAPI Docker docs](https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker#advanced-usage).

-## API Documentation
+# API Documentation

 The running webserver provides a REST API with the following route:

-* /evaluate-slope
-    * Method: POST
-    * Body:
-        * total_lags
-        * threshold
-        * warmup
+* /dropped-records
+  * Method: POST
+  * Body:
+    * results
+      * metric-metadata
+      * values
+    * metadata
+      * threshold
+      * warmup

 The body of the request must be a JSON string that satisfies the following conditions:

-* **total_lag**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON structure:
-    ```
-        { 
+* **total_lag**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON *structure*:
+
+    ```json
+    {
+        "results": [
            [
-                "metric": {
-                    "group": "<label_value>"
-                },
-                "values": [
-                    [
-                        <unix_timestamp>,
-                        "<sample_value>"
+                {
+                    "metric": {
+                        "<label-name>": "<label-value>"
+                    },
+                    "values": [
+                        [
+                            <unix_timestamp>, // 1.634624989695E9
+                            "<sample_value>" // integer
+                        ]
                    ]
-                ]
+                }
            ]
+        ],
+        "metadata": {
+            "threshold": 2000000,
+            "warmup": 60
        }
+    }
    ```
-    * The `<label_value>` provided in "metric.group" must be equal to the id of the Kafka consumer group.
+
+### description
+
+* results:
+  * metric-metadata:
+    * Labels of this metric. The `dropped-records` slo checker does not use labels in the calculation of the service level objective.
+  * results
    * The `<unix_timestamp>` provided as the first element of each element in the "values" array must be the timestamp of the measurement value in seconds (with optional decimal precision)
    * The `<sample_value>` must be the measurement value as string.
-* **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
-* **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
\ No newline at end of file
+* metadata: For the calculation of the service level objective require metadata.
+  * **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
+  * **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
--- a/slope-evaluator/app/main.py
+++ b/slope-evaluator/app/main.py
@@ -38,7 +38,7 @@ def calculate_slope_trend(results, warmup):
        err_msg = 'Computing trend slope failed.'
        logger.exception(err_msg)
        logger.error('Mark this subexperiment as not successful and continue benchmark.')
-        return False
+        return float('inf')

    logger.info("Computed lag trend slope is '%s'", trend_slope)
    return trend_slope
@@ -49,7 +49,7 @@ def check_service_level_objective(results, threshold):
 @app.post("/evaluate-slope",response_model=bool)
 async def evaluate_slope(request: Request):
    data = json.loads(await request.body())
-    results = [calculate_slope_trend(total_lag, data['warmup']) for total_lag in data['total_lags']]
-    return check_service_level_objective(results=results, threshold=data["threshold"])
+    results = [calculate_slope_trend(total_lag, data['metadata']['warmup']) for total_lag in data['results']]
+    return check_service_level_objective(results=results, threshold=data['metadata']["threshold"])

 logger.info("SLO evaluator is online")
\ No newline at end of file
--- a/slope-evaluator/app/test.py
+++ b/slope-evaluator/app/test.py
@@ -17,7 +17,7 @@ class TestSloEvaluation(unittest.TestCase):
            data = json.load(json_file)
            response = self.client.post("/evaluate-slope", json=data)
            self.assertEquals(response.json(), True)
-        
+
    def test_check_service_level_objective(self):
        list = [1,2,3,4]
        self.assertEquals(check_service_level_objective(list, 2), False)

--- a/slope-evaluator/app/trend_slope_computer.py
+++ b/slope-evaluator/app/trend_slope_computer.py
--- a/slo-checker/record-lag/requirements.txt
+++ b/slo-checker/record-lag/requirements.txt
+fastapi==0.65.2
+scikit-learn==0.20.3
+pandas==1.0.3
+uvicorn
+requests
--- a/slope-evaluator/resources/test-1-rep-success.json
+++ b/slope-evaluator/resources/test-1-rep-success.json
 {
-    "total_lags": [
+    "results": [
        [
            {
                "metric": {
@@ -134,6 +134,8 @@
            }
        ]
    ],
-    "threshold": 2000,
-    "warmup": 0
+    "metadata": {
+        "threshold": 2000,
+        "warmup": 0
+    }
 }
\ No newline at end of file
--- a/slope-evaluator/resources/test-3-rep-success.json
+++ b/slope-evaluator/resources/test-3-rep-success.json
 {
-    "total_lags": [
+    "results": [
        [
            {
                "metric": {
@@ -284,6 +284,8 @@
            }
        ]
    ],
-    "threshold": 2000,
-    "warmup": 0
+    "metadata": {
+        "threshold": 2000,
+        "warmup": 0
+    }
 }
\ No newline at end of file