Skip to content
Snippets Groups Projects
Commit aabe2ce1 authored by Sören Henning's avatar Sören Henning
Browse files

Merge branch '250-add-dropped-records-slo-correct-branch' into 'master'

Add new SLO checker for the metric of skipped/dropped-records

Closes #250

See merge request !185
parents c548f277 c4c34a35
No related branches found
No related tags found
1 merge request!185Add new SLO checker for the metric of skipped/dropped-records
Pipeline #5093 passed
Showing
with 568 additions and 13 deletions
......@@ -319,7 +319,18 @@ test-slo-checker-lag-trend:
tags:
- exec-docker
script:
- cd slope-evaluator
- cd slo-checker/record-lag
- pip install -r requirements.txt
- cd app
- python -m unittest
test-slo-checker-dropped-records-kstreams:
stage: test
image: python:3.7-slim
tags:
- exec-docker
script:
- cd slo-checker/dropped-records
- pip install -r requirements.txt
- cd app
- python -m unittest
......@@ -332,7 +343,7 @@ deploy-slo-checker-lag-trend:
- test-slo-checker-lag-trend
script:
- DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
- docker build --pull -t theodolite-slo-checker-lag-trend slope-evaluator
- docker build --pull -t theodolite-slo-checker-lag-trend slo-checker/record-lag
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-lag-trend $CR_HOST/$CR_ORG/theodolite-slo-checker-lag-trend:${DOCKER_TAG_NAME}latest"
- "[ $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-lag-trend $CR_HOST/$CR_ORG/theodolite-slo-checker-lag-trend:$CI_COMMIT_TAG"
- echo $CR_PW | docker login $CR_HOST -u $CR_USER --password-stdin
......@@ -342,7 +353,32 @@ deploy-slo-checker-lag-trend:
- if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW && $CI_COMMIT_TAG"
when: always
- changes:
- slope-evaluator/**/*
- slo-checker/record-lag/**/*
if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
when: always
- if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
when: manual
allow_failure: true
deploy-slo-checker-dropped-records-kstreams:
stage: deploy
extends:
- .dind
needs:
- test-slo-checker-dropped-records-kstreams
script:
- DOCKER_TAG_NAME=$(echo $CI_COMMIT_REF_SLUG- | sed 's/^master-$//')
- docker build --pull -t theodolite-slo-checker-dropped-records-kstreams slo-checker/dropped-records
- "[ ! $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-dropped-records-kstreams $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams:${DOCKER_TAG_NAME}latest"
- "[ $CI_COMMIT_TAG ] && docker tag theodolite-slo-checker-dropped-records-kstreams $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams:$CI_COMMIT_TAG"
- echo $CR_PW | docker login $CR_HOST -u $CR_USER --password-stdin
- docker push $CR_HOST/$CR_ORG/theodolite-slo-checker-dropped-records-kstreams
- docker logout
rules:
- if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW && $CI_COMMIT_TAG"
when: always
- changes:
- slo-checker/dropped-records/**/*
if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
when: always
- if: "$CR_HOST && $CR_ORG && $CR_USER && $CR_PW"
......
......@@ -64,6 +64,17 @@ spec:
- name: LOG_LEVEL
value: INFO
{{- end }}
{{- if .Values.operator.sloChecker.droppedRecordsKStreams.enabled }}
- name: slo-checker-dropped-records-kstreams
image: "{{ .Values.operator.sloChecker.droppedRecordsKStreams.image }}:{{ .Values.operator.sloChecker.droppedRecordsKStreams.imageTag }}"
imagePullPolicy: "{{ .Values.operator.sloChecker.droppedRecordsKStreams.imagePullPolicy }}"
ports:
- containerPort: 80
name: analysis
env:
- name: LOG_LEVEL
value: INFO
{{- end }}
{{- if and .Values.operator.resultsVolume.enabled .Values.operator.resultsVolume.accessSidecar.enabled }}
- name: results-access
image: busybox:stable
......
......@@ -256,6 +256,11 @@ operator:
image: ghcr.io/cau-se/theodolite-slo-checker-lag-trend
imageTag: latest
imagePullPolicy: Always
droppedRecordsKStreams:
enabled: true
image: ghcr.io/cau-se/theodolite-slo-checker-dropped-records-kstreams
imageTag: latest
imagePullPolicy: Always
resultsVolume:
enabled: true
......
File moved
# Kafka Streams Dropped Record SLO Evaluator
## Execution
For development:
```sh
uvicorn main:app --reload --port 81 # run this command inside the app/ folder
```
## Build the docker image:
```sh
docker build . -t theodolite-evaluator
```
Run the Docker image:
```sh
docker run -p 80:81 theodolite-evaluator
```
## Configuration
You can set the `HOST` and the `PORT` (and a lot of more parameters) via environment variables. Default is `0.0.0.0:80`.
For more information see the [Gunicorn/FastAPI Docker docs](https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker#advanced-usage).
## API Documentation
The running webserver provides a REST API with the following route:
* /dropped-records
* Method: POST
* Body:
* results
* metric-metadata
* values
* metadata
* threshold
* warmup
The body of the request must be a JSON string that satisfies the following conditions:
* **dropped records**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON *structure*:
```json
{
"results": [
[
{
"metric": {
"<label-name>": "<label-value>"
},
"values": [
[
<unix_timestamp>, // 1.634624989695E9
"<sample_value>" // integer
]
]
}
]
],
"metadata": {
"threshold": 2000000,
"warmup": 60
}
}
```
### description
* results:
* metric-metadata:
* Labels of this metric. The `dropped-records` slo checker does not use labels in the calculation of the service level objective.
* results
* The `<unix_timestamp>` provided as the first element of each element in the "values" array must be the timestamp of the measurement value in seconds (with optional decimal precision)
* The `<sample_value>` must be the measurement value as string.
* metadata: For the calculation of the service level objective require metadata.
* **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
* **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
from fastapi import FastAPI,Request
import logging
import os
import json
import sys
app = FastAPI()
logging.basicConfig(stream=sys.stdout,
format="%(asctime)s %(levelname)s %(name)s: %(message)s")
logger = logging.getLogger("API")
if os.getenv('LOG_LEVEL') == 'INFO':
logger.setLevel(logging.INFO)
elif os.getenv('LOG_LEVEL') == 'WARNING':
logger.setLevel(logging.WARNING)
elif os.getenv('LOG_LEVEL') == 'DEBUG':
logger.setLevel(logging.DEBUG)
def check_service_level_objective(results, threshold):
return max(results) < threshold
@app.post("/dropped-records",response_model=bool)
async def evaluate_slope(request: Request):
data = json.loads(await request.body())
warmup = int(data['results'][0][0]['values'][0][0]) + int(data['metadata']['warmup'])
results = [int(val[1]) if(int(val[0]>=warmup)) else 0 for result in data['results'] for r in result for val in r['values'] ]
return check_service_level_objective(results=results, threshold=data['metadata']["threshold"])
logger.info("SLO evaluator is online")
\ No newline at end of file
import unittest
from main import app, check_service_level_objective
import numpy as np
import json
from fastapi.testclient import TestClient
class TestSloEvaluation(unittest.TestCase):
client = TestClient(app)
def test_1_rep(self):
with open('../resources/test-1-rep-success.json') as json_file:
data = json.load(json_file)
response = self.client.post("/dropped-records", json=data)
self.assertEquals(response.json(), True)
def test_check_service_level_objective(self):
list = [ x for x in range(-100, 100) ]
self.assertEquals(check_service_level_objective(list, 90), False)
self.assertEquals(check_service_level_objective(list, 110), True)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
{
"results": [
[
{
"metric": {
"job": "titan-ccp-aggregation"
},
"values": [
[
1.634624674695E9,
"0"
],
[
1.634624679695E9,
"0"
],
[
1.634624684695E9,
"0"
],
[
1.634624689695E9,
"0"
],
[
1.634624694695E9,
"0"
],
[
1.634624699695E9,
"0"
],
[
1.634624704695E9,
"0"
],
[
1.634624709695E9,
"0"
],
[
1.634624714695E9,
"0"
],
[
1.634624719695E9,
"0"
],
[
1.634624724695E9,
"0"
],
[
1.634624729695E9,
"0"
],
[
1.634624734695E9,
"0"
],
[
1.634624739695E9,
"0"
],
[
1.634624744695E9,
"1"
],
[
1.634624749695E9,
"3"
],
[
1.634624754695E9,
"4"
],
[
1.634624759695E9,
"4"
],
[
1.634624764695E9,
"4"
],
[
1.634624769695E9,
"4"
],
[
1.634624774695E9,
"4"
],
[
1.634624779695E9,
"4"
],
[
1.634624784695E9,
"4"
],
[
1.634624789695E9,
"4"
],
[
1.634624794695E9,
"4"
],
[
1.634624799695E9,
"4"
],
[
1.634624804695E9,
"176"
],
[
1.634624809695E9,
"176"
],
[
1.634624814695E9,
"176"
],
[
1.634624819695E9,
"176"
],
[
1.634624824695E9,
"176"
],
[
1.634624829695E9,
"159524"
],
[
1.634624834695E9,
"209870"
],
[
1.634624839695E9,
"278597"
],
[
1.634624844695E9,
"460761"
],
[
1.634624849695E9,
"460761"
],
[
1.634624854695E9,
"460761"
],
[
1.634624859695E9,
"460761"
],
[
1.634624864695E9,
"460761"
],
[
1.634624869695E9,
"606893"
],
[
1.634624874695E9,
"653534"
],
[
1.634624879695E9,
"755796"
],
[
1.634624884695E9,
"919317"
],
[
1.634624889695E9,
"919317"
],
[
1.634624894695E9,
"955926"
],
[
1.634624899695E9,
"955926"
],
[
1.634624904695E9,
"955926"
],
[
1.634624909695E9,
"955926"
],
[
1.634624914695E9,
"955926"
],
[
1.634624919695E9,
"1036530"
],
[
1.634624924695E9,
"1078477"
],
[
1.634624929695E9,
"1194775"
],
[
1.634624934695E9,
"1347755"
],
[
1.634624939695E9,
"1352151"
],
[
1.634624944695E9,
"1360428"
],
[
1.634624949695E9,
"1360428"
],
[
1.634624954695E9,
"1360428"
],
[
1.634624959695E9,
"1360428"
],
[
1.634624964695E9,
"1360428"
],
[
1.634624969695E9,
"1525685"
],
[
1.634624974695E9,
"1689296"
],
[
1.634624979695E9,
"1771358"
],
[
1.634624984695E9,
"1854284"
],
[
1.634624989695E9,
"1854284"
]
]
}
]
],
"metadata": {
"threshold": 2000000,
"warmup": 60
}
}
\ No newline at end of file
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
COPY ./app /app
\ No newline at end of file
......@@ -25,37 +25,56 @@ docker run -p 80:80 theodolite-evaluator
You can set the `HOST` and the `PORT` (and a lot of more parameters) via environment variables. Default is `0.0.0.0:80`.
For more information see the [Gunicorn/FastAPI Docker docs](https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker#advanced-usage).
## API Documentation
# API Documentation
The running webserver provides a REST API with the following route:
* /evaluate-slope
* Method: POST
* Body:
* total_lags
* threshold
* warmup
* /dropped-records
* Method: POST
* Body:
* results
* metric-metadata
* values
* metadata
* threshold
* warmup
The body of the request must be a JSON string that satisfies the following conditions:
* **total_lag**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON structure:
```
{
* **total_lag**: This property is based on the [Range Vector type](https://www.prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) from Prometheus and must have the following JSON *structure*:
```json
{
"results": [
[
"metric": {
"group": "<label_value>"
},
"values": [
[
<unix_timestamp>,
"<sample_value>"
{
"metric": {
"<label-name>": "<label-value>"
},
"values": [
[
<unix_timestamp>, // 1.634624989695E9
"<sample_value>" // integer
]
]
]
}
]
],
"metadata": {
"threshold": 2000000,
"warmup": 60
}
}
```
* The `<label_value>` provided in "metric.group" must be equal to the id of the Kafka consumer group.
### description
* results:
* metric-metadata:
* Labels of this metric. The `dropped-records` slo checker does not use labels in the calculation of the service level objective.
* results
* The `<unix_timestamp>` provided as the first element of each element in the "values" array must be the timestamp of the measurement value in seconds (with optional decimal precision)
* The `<sample_value>` must be the measurement value as string.
* **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
* **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
\ No newline at end of file
* metadata: For the calculation of the service level objective require metadata.
* **threshold**: Must be an unsigned integer that specifies the threshold for the SLO evaluation. The SLO is considered fulfilled, if the result value is below the threshold. If the result value is equal or above the threshold, the SLO is considered not fulfilled.
* **warmup**: Specifieds the warmup time in seconds that are ignored for evaluating the SLO.
......@@ -38,7 +38,7 @@ def calculate_slope_trend(results, warmup):
err_msg = 'Computing trend slope failed.'
logger.exception(err_msg)
logger.error('Mark this subexperiment as not successful and continue benchmark.')
return False
return float('inf')
logger.info("Computed lag trend slope is '%s'", trend_slope)
return trend_slope
......@@ -49,7 +49,7 @@ def check_service_level_objective(results, threshold):
@app.post("/evaluate-slope",response_model=bool)
async def evaluate_slope(request: Request):
data = json.loads(await request.body())
results = [calculate_slope_trend(total_lag, data['warmup']) for total_lag in data['total_lags']]
return check_service_level_objective(results=results, threshold=data["threshold"])
results = [calculate_slope_trend(total_lag, data['metadata']['warmup']) for total_lag in data['results']]
return check_service_level_objective(results=results, threshold=data['metadata']["threshold"])
logger.info("SLO evaluator is online")
\ No newline at end of file
......@@ -17,7 +17,7 @@ class TestSloEvaluation(unittest.TestCase):
data = json.load(json_file)
response = self.client.post("/evaluate-slope", json=data)
self.assertEquals(response.json(), True)
def test_check_service_level_objective(self):
list = [1,2,3,4]
self.assertEquals(check_service_level_objective(list, 2), False)
......
fastapi==0.65.2
scikit-learn==0.20.3
pandas==1.0.3
uvicorn
requests
{
"total_lags": [
"results": [
[
{
"metric": {
......@@ -134,6 +134,8 @@
}
]
],
"threshold": 2000,
"warmup": 0
"metadata": {
"threshold": 2000,
"warmup": 0
}
}
\ No newline at end of file
{
"total_lags": [
"results": [
[
{
"metric": {
......@@ -284,6 +284,8 @@
}
]
],
"threshold": 2000,
"warmup": 0
"metadata": {
"threshold": 2000,
"warmup": 0
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment