Flink AggregatingState 实例
AggregatingState介绍
- AggregatingState需要和AggregateFunction配合使用
- add()方法添加一个元素,触发AggregateFunction计算
- get()获取State的值
需求:计算每个设备10秒内的平均温度
- import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
- import org.apache.flink.api.common.eventtime.WatermarkStrategy;
- import org.apache.flink.api.common.functions.AggregateFunction;
- import org.apache.flink.api.common.state.AggregatingState;
- import org.apache.flink.api.common.state.AggregatingStateDescriptor;
- import org.apache.flink.api.common.typeinfo.TypeHint;
- import org.apache.flink.api.common.typeinfo.TypeInformation;
- import org.apache.flink.api.java.functions.KeySelector;
- import org.apache.flink.api.java.tuple.Tuple2;
- import org.apache.flink.api.java.tuple.Tuple3;
- import org.apache.flink.configuration.Configuration;
- import org.apache.flink.streaming.api.datastream.DataStreamSource;
- import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
- import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
- import org.apache.flink.streaming.api.functions.source.SourceFunction;
- import org.apache.flink.util.Collector;
-
- import java.time.Duration;
- import java.util.Random;
-
- public class AggregatingStateTest {
- public static void main(String[] args) throws Exception {
- // 计算每个设备10s内温度的平均值
- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
- env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
- env.getConfig().setAutoWatermarkInterval(100l);
-
- DataStreamSource<Tuple3<String, Integer, Long>> tuple3DataStreamSource = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
- boolean flag = true;
-
- @Override
- public void run(SourceContext<Tuple3<String, Integer, Long>> ctx) throws Exception {
- String[] str = {"水阀1", "水阀2", "水阀3"};
- while (flag) {
- int i = new Random().nextInt(3);
- // 温度
- int temperature = new Random().nextInt(100);
- Thread.sleep(1000l);
- // 设备号、温度、事件时间
- ctx.collect(new Tuple3<String, Integer, Long>(str[i], temperature, System.currentTimeMillis()));
- }
- }
-
- @Override
- public void cancel() {
- flag = false;
- }
- });
-
- tuple3DataStreamSource.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, Integer, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(2))
- .withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, Integer, Long>>() {
- @Override
- public long extractTimestamp(Tuple3<String, Integer, Long> stringIntegerLongTuple3, long l) {
- return stringIntegerLongTuple3.f2;
- }
- })).keyBy(new KeySelector<Tuple3<String, Integer, Long>, String>() {
- @Override
- public String getKey(Tuple3<String, Integer, Long> stringIntegerLongTuple3) throws Exception {
- return stringIntegerLongTuple3.f0;
- }
- }).process(new KeyedProcessFunction<String, Tuple3<String, Integer, Long>, String>() {
- Long interval = 10 * 1000l;
- // <Integer, Double>这个类型是aggregatingState中的输入和输出类型
- AggregatingState<Integer, Double> aggregatingState = null;
- @Override
- public void open(Configuration parameters) throws Exception {
- @Override
- public void open(Configuration parameters) throws Exception {
super.open(parameters);- // <Integer, Tuple2<Integer,Integer>, Double>这是输入,中间状态,输出类型。TypeInformation.of(new TypeHint<Tuple2<Integer,Integer>>(){})这个是aggregatingState存储的数据的类型
- AggregatingStateDescriptor<Integer, Tuple2<Integer,Integer>, Double> aggregatingStateDescriptor =
- new AggregatingStateDescriptor<Integer, Tuple2<Integer,Integer>, Double>("aggregatingState", new MyAggregate(), TypeInformation.of(new TypeHint<Tuple2<Integer,Integer>>(){}));
- aggregatingState = getRuntimeContext().getAggregatingState(aggregatingStateDescriptor);
- }
-
- @Override
- public void processElement(Tuple3<String, Integer, Long> value, Context ctx, Collector<String> out) throws Exception {
- // 10s的起始的时间
- Long start = ctx.timestamp() - (ctx.timestamp() % interval);
- Long timerTimestamp = start + interval;
- ctx.timerService().registerEventTimeTimer(timerTimestamp);
- aggregatingState.add(value.f1);
- }
-
- @Override
- public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
- super.onTimer(timestamp, ctx, out);
- Double aDouble = aggregatingState.get();
- String str = "[" + ctx.getCurrentKey() + "] " + "十秒内的平均温度为:" + aDouble;
- out.collect(str);
- }
- }).print();
-
- env.execute("aggregatingState");
- }
-
- private static class MyAggregate implements AggregateFunction<Integer, Tuple2<Integer,Integer>, Double> {
-
- @Override
- public Tuple2<Integer, Integer> createAccumulator() {
- // 初始化温度和次数
- return new Tuple2<Integer, Integer>(0,0);
- }
-
- @Override
- public Tuple2<Integer, Integer> add(Integer integer, Tuple2<Integer, Integer> integerIntegerTuple2) {
- // 历史温度加上本次温度,次数加1
- return new Tuple2<Integer, Integer>(integerIntegerTuple2.f0 + integer, integerIntegerTuple2.f1 +1);
- }
-
- @Override
- public Double getResult(Tuple2<Integer, Integer> integerIntegerTuple2) {
- return Double.valueOf(integerIntegerTuple2.f0 / integerIntegerTuple2.f1);
- }
-
- @Override
- public Tuple2<Integer, Integer> merge(Tuple2<Integer, Integer> integerIntegerTuple2, Tuple2<Integer, Integer> acc1) {
- return new Tuple2<Integer, Integer>(integerIntegerTuple2.f0 + acc1.f0, integerIntegerTuple2.f1 + acc1.f1);
- }
- }
- }
原文链接:https://blog.csdn.net/qq_35514685/article/details/124351482
标签:flink,实例,api,Flink,new,AggregatingState,org,apache,import
From: https://www.cnblogs.com/sunny3158/p/18056936