Skip to content
Snippets Groups Projects
Commit 8c172f32 authored by Sören Henning's avatar Sören Henning
Browse files

Use new KafkaConnectorFactory in benchmarks

parent 6e3c43d3
No related branches found
No related tags found
1 merge request!90Migrate Flink benchmark implementation
package theodolite.uc1.application;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroDeserializationSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import theodolite.commons.flink.KafkaConnectorFactory;
import titan.ccp.common.configuration.ServiceConfigurations;
import titan.ccp.model.records.ActivePowerRecord;
......@@ -31,21 +29,11 @@ public class HistoryServiceFlinkJob {
final String schemaRegistryUrl = this.config.getString(ConfigurationKeys.SCHEMA_REGISTRY_URL);
final boolean checkpointing = this.config.getBoolean(ConfigurationKeys.CHECKPOINTING, true);
final Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", kafkaBroker);
kafkaProps.setProperty("group.id", applicationId);
final DeserializationSchema<ActivePowerRecord> serde =
ConfluentRegistryAvroDeserializationSchema.forSpecific(
ActivePowerRecord.class,
schemaRegistryUrl);
final KafkaConnectorFactory kafkaConnector = new KafkaConnectorFactory(
applicationId, kafkaBroker, checkpointing, schemaRegistryUrl);
final FlinkKafkaConsumer<ActivePowerRecord> kafkaConsumer =
new FlinkKafkaConsumer<>(inputTopic, serde, kafkaProps);
kafkaConsumer.setStartFromGroupOffsets();
if (checkpointing) {
kafkaConsumer.setCommitOffsetsOnCheckpoints(true);
}
kafkaConnector.createConsumer(inputTopic, ActivePowerRecord.class);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
if (checkpointing) {
......
package theodolite.uc2.application;
import com.google.common.math.Stats;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroDeserializationSchema;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
......@@ -15,12 +11,11 @@ import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindo
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.common.serialization.Serdes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import theodolite.commons.flink.KafkaConnectorFactory;
import theodolite.commons.flink.StateBackends;
import theodolite.commons.flink.serialization.FlinkKafkaKeyValueSerde;
import theodolite.commons.flink.serialization.StatsSerializer;
import titan.ccp.common.configuration.ServiceConfigurations;
import titan.ccp.model.records.ActivePowerRecord;
......@@ -48,32 +43,17 @@ public class HistoryServiceFlinkJob {
final boolean checkpointing = this.config.getBoolean(ConfigurationKeys.CHECKPOINTING, true);
final StateBackend stateBackend = StateBackends.fromConfiguration(this.config);
final Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", kafkaBroker);
kafkaProps.setProperty("group.id", applicationId);
final KafkaConnectorFactory kafkaConnector = new KafkaConnectorFactory(
applicationId, kafkaBroker, checkpointing, schemaRegistryUrl);
final DeserializationSchema<ActivePowerRecord> sourceSerde =
ConfluentRegistryAvroDeserializationSchema.forSpecific(
ActivePowerRecord.class,
schemaRegistryUrl);
final FlinkKafkaConsumer<ActivePowerRecord> kafkaSource =
kafkaConnector.createConsumer(inputTopic, ActivePowerRecord.class);
final FlinkKafkaConsumer<ActivePowerRecord> kafkaSource = new FlinkKafkaConsumer<>(
inputTopic, sourceSerde, kafkaProps);
kafkaSource.setStartFromGroupOffsets();
if (checkpointing) {
kafkaSource.setCommitOffsetsOnCheckpoints(true);
}
kafkaSource.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps());
final KafkaSerializationSchema<Tuple2<String, String>> sinkSerde =
new FlinkKafkaKeyValueSerde<>(outputTopic,
final FlinkKafkaProducer<Tuple2<String, String>> kafkaSink =
kafkaConnector.createProducer(outputTopic,
Serdes::String,
Serdes::String,
Types.TUPLE(Types.STRING, Types.STRING));
kafkaProps.setProperty("transaction.timeout.ms", "" + 5 * 60 * 1000); // TODO necessary?
final FlinkKafkaProducer<Tuple2<String, String>> kafkaSink = new FlinkKafkaProducer<>(
outputTopic, sinkSerde, kafkaProps, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);
kafkaSink.setWriteTimestampToKafka(true);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
......
......@@ -4,14 +4,10 @@ import com.google.common.math.Stats;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.Properties;
import org.apache.commons.configuration2.Configuration;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroDeserializationSchema;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
......@@ -22,8 +18,8 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.common.serialization.Serdes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import theodolite.commons.flink.KafkaConnectorFactory;
import theodolite.commons.flink.StateBackends;
import theodolite.commons.flink.serialization.FlinkKafkaKeyValueSerde;
import theodolite.commons.flink.serialization.StatsSerializer;
import theodolite.uc3.application.util.HourOfDayKey;
import theodolite.uc3.application.util.HourOfDayKeyFactory;
......@@ -32,7 +28,6 @@ import theodolite.uc3.application.util.StatsKeyFactory;
import titan.ccp.common.configuration.ServiceConfigurations;
import titan.ccp.model.records.ActivePowerRecord;
/**
* The History microservice implemented as a Flink job.
*/
......@@ -52,45 +47,26 @@ public class HistoryServiceFlinkJob {
final String inputTopic = this.config.getString(ConfigurationKeys.KAFKA_INPUT_TOPIC);
final String outputTopic = this.config.getString(ConfigurationKeys.KAFKA_OUTPUT_TOPIC);
final String schemaRegistryUrl = this.config.getString(ConfigurationKeys.SCHEMA_REGISTRY_URL);
final String timeZoneString = this.config.getString(ConfigurationKeys.TIME_ZONE);
final ZoneId timeZone = ZoneId.of(timeZoneString);
final ZoneId timeZone = ZoneId.of(this.config.getString(ConfigurationKeys.TIME_ZONE));
final Time aggregationDuration =
Time.days(this.config.getInt(ConfigurationKeys.AGGREGATION_DURATION_DAYS));
Time.seconds(this.config.getInt(ConfigurationKeys.AGGREGATION_DURATION_DAYS));
final Time aggregationAdvance =
Time.days(this.config.getInt(ConfigurationKeys.AGGREGATION_ADVANCE_DAYS));
Time.seconds(this.config.getInt(ConfigurationKeys.AGGREGATION_ADVANCE_DAYS));
final StateBackend stateBackend = StateBackends.fromConfiguration(this.config);
final boolean checkpointing = this.config.getBoolean(ConfigurationKeys.CHECKPOINTING, true);
final Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", kafkaBroker);
kafkaProps.setProperty("group.id", applicationId);
// Sources and Sinks with Serializer and Deserializer
final DeserializationSchema<ActivePowerRecord> sourceSerde =
ConfluentRegistryAvroDeserializationSchema.forSpecific(
ActivePowerRecord.class,
schemaRegistryUrl);
final FlinkKafkaConsumer<ActivePowerRecord> kafkaSource = new FlinkKafkaConsumer<>(
inputTopic, sourceSerde, kafkaProps);
final KafkaConnectorFactory kafkaConnector = new KafkaConnectorFactory(
applicationId, kafkaBroker, checkpointing, schemaRegistryUrl);
kafkaSource.setStartFromGroupOffsets();
if (checkpointing) {
kafkaSource.setCommitOffsetsOnCheckpoints(true);
}
kafkaSource.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps());
final FlinkKafkaKeyValueSerde<String, String> sinkSerde =
new FlinkKafkaKeyValueSerde<>(outputTopic,
// Sources and Sinks
final FlinkKafkaConsumer<ActivePowerRecord> kafkaSource =
kafkaConnector.createConsumer(inputTopic, ActivePowerRecord.class);
final FlinkKafkaProducer<Tuple2<String, String>> kafkaSink =
kafkaConnector.createProducer(outputTopic,
Serdes::String,
Serdes::String,
Types.TUPLE(Types.STRING, Types.STRING));
final FlinkKafkaProducer<Tuple2<String, String>> kafkaSink = new FlinkKafkaProducer<>(
outputTopic, sinkSerde, kafkaProps, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);
kafkaSink.setWriteTimestampToKafka(true);
// Execution environment configuration
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
......
package theodolite.uc4.application;
import java.time.Duration;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.configuration2.Configuration;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.formats.avro.registry.confluent.ConfluentRegistryAvroDeserializationSchema;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
......@@ -23,8 +19,9 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.common.serialization.Serdes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import theodolite.commons.flink.KafkaConnectorFactory;
import theodolite.commons.flink.StateBackends;
import theodolite.commons.flink.serialization.FlinkKafkaKeyValueSerde;
import theodolite.commons.flink.TupleType;
import theodolite.uc4.application.util.ImmutableSensorRegistrySerializer;
import theodolite.uc4.application.util.ImmutableSetSerializer;
import theodolite.uc4.application.util.SensorParentKey;
......@@ -67,72 +64,33 @@ public class AggregationServiceFlinkJob {
final boolean debug = this.config.getBoolean(ConfigurationKeys.DEBUG, true);
final boolean checkpointing = this.config.getBoolean(ConfigurationKeys.CHECKPOINTING, true);
final Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", kafkaBroker);
kafkaProps.setProperty("group.id", applicationId);
// Sources and Sinks with Serializer and Deserializer
final KafkaConnectorFactory kafkaConnector = new KafkaConnectorFactory(
applicationId, kafkaBroker, checkpointing, schemaRegistryUrl);
// Source from input topic with ActivePowerRecords
final DeserializationSchema<ActivePowerRecord> inputSerde =
ConfluentRegistryAvroDeserializationSchema.forSpecific(
ActivePowerRecord.class,
schemaRegistryUrl);
final FlinkKafkaConsumer<ActivePowerRecord> kafkaInputSource = new FlinkKafkaConsumer<>(
inputTopic, inputSerde, kafkaProps);
kafkaInputSource.setStartFromGroupOffsets();
if (checkpointing) {
kafkaInputSource.setCommitOffsetsOnCheckpoints(true);
}
final FlinkKafkaConsumer<ActivePowerRecord> kafkaInputSource =
kafkaConnector.createConsumer(inputTopic, ActivePowerRecord.class);
// TODO Watermarks?
// Source from output topic with AggregatedPowerRecords
final DeserializationSchema<AggregatedActivePowerRecord> outputSerde =
ConfluentRegistryAvroDeserializationSchema.forSpecific(
AggregatedActivePowerRecord.class,
schemaRegistryUrl);
final FlinkKafkaConsumer<AggregatedActivePowerRecord> kafkaOutputSource =
new FlinkKafkaConsumer<>(
outputTopic, outputSerde, kafkaProps);
kafkaOutputSource.setStartFromGroupOffsets();
if (checkpointing) {
kafkaOutputSource.setCommitOffsetsOnCheckpoints(true);
}
kafkaConnector.createConsumer(outputTopic, AggregatedActivePowerRecord.class);
// Source from configuration topic with EventSensorRegistry JSON
final FlinkKafkaKeyValueSerde<Event, String> configSerde =
new FlinkKafkaKeyValueSerde<>(
final FlinkKafkaConsumer<Tuple2<Event, String>> kafkaConfigSource =
kafkaConnector.createConsumer(
configurationTopic,
EventSerde::serde,
Serdes::String,
TypeInformation.of(new TypeHint<Tuple2<Event, String>>() {}));
final FlinkKafkaConsumer<Tuple2<Event, String>> kafkaConfigSource = new FlinkKafkaConsumer<>(
configurationTopic, configSerde, kafkaProps);
kafkaConfigSource.setStartFromGroupOffsets();
if (checkpointing) {
kafkaConfigSource.setCommitOffsetsOnCheckpoints(true);
}
TupleType.of(TypeInformation.of(Event.class), Types.STRING));
// Sink to output topic with SensorId, AggregatedActivePowerRecord
final FlinkKafkaKeyValueSerde<String, AggregatedActivePowerRecord> aggregationSerde =
new FlinkKafkaKeyValueSerde<>(
final FlinkKafkaProducer<Tuple2<String, AggregatedActivePowerRecord>> kafkaAggregationSink =
kafkaConnector.createProducer(
outputTopic,
Serdes::String,
() -> new SchemaRegistryAvroSerdeFactory(schemaRegistryUrl).forValues(),
Types.TUPLE(Types.STRING, TypeInformation.of(AggregatedActivePowerRecord.class)));
final FlinkKafkaProducer<Tuple2<String, AggregatedActivePowerRecord>> kafkaAggregationSink =
new FlinkKafkaProducer<>(
outputTopic,
aggregationSerde,
kafkaProps,
FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);
kafkaAggregationSink.setWriteTimestampToKafka(true);
// Execution environment configuration
// org.apache.flink.configuration.Configuration conf = new
// org.apache.flink.configuration.Configuration();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment