CoProcessFunction 实战三部曲之三：定时器和侧输出

2022 年 5 月 05 日
本文字数：3953 字
阅读完需：约 13 分钟

public class ExecuteWithTimeoutCoProcessFunction extends CoProcessFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>> {

private static final Logger logger = LoggerFactory.getLogger(ExecuteWithTimeoutCoProcessFunction.class);

/**

等待时间

private static final long WAIT_TIME = 10000L;

public ExecuteWithTimeoutCoProcessFunction(OutputTag<String> source1SideOutput, OutputTag<String> source2SideOutput) {

super();

this.source1SideOutput = source1SideOutput;

this.source2SideOutput = source2SideOutput;

}

private OutputTag<String> source1SideOutput;

private OutputTag<String> source2SideOutput;

// 某个 key 在 processElement1 中存入的状态

private ValueState<Integer> state1;

// 某个 key 在 processElement2 中存入的状态

private ValueState<Integer> state2;

// 如果创建了定时器，就在状态中保存定时器的 key

private ValueState<Long> timerState;

// onTimer 中拿不到当前 key，只能提前保存在状态中（KeyedProcessFunction 的 OnTimerContext 有 API 可以取到，但是 CoProcessFunction 的 OnTimerContext 却没有）

private ValueState<String> currentKeyState;

@Override

public void open(Configuration parameters) throws Exception {

// 初始化状态

state1 = getRuntimeContext().getState(new ValueStateDescriptor<>("myState1", Integer.class));

state2 = getRuntimeContext().getState(new ValueStateDescriptor<>("myState2", Integer.class));

timerState = getRuntimeContext().getState(new ValueStateDescriptor<>("timerState", Long.class));

currentKeyState = getRuntimeContext().getState(new ValueStateDescriptor<>("currentKeyState", String.class));

}

/**

所有状态都清理掉

private void clearAllState() {

state1.clear();

state2.clear();

currentKeyState.clear();

timerState.clear();

}

@Override

public void processElement1(Tuple2<String, Integer> value, Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception {

logger.info("processElement1：处理元素 1：{}", value);

String key = value.f0;

Integer value2 = state2.value();

// value2 为空，就表示 processElement2 还没有处理或这个 key，

// 这时候就把 value1 保存起来

if(null==value2) {

logger.info("processElement1：2 号流还未收到过[{}]，把 1 号流收到的值[{}]保存起来", key, value.f1);

state1.update(value.f1);

currentKeyState.update(key);

// 开始 10 秒的定时器，10 秒后会进入

long timerKey = ctx.timestamp() + WAIT_TIME;

ctx.timerService().registerProcessingTimeTimer(timerKey);

// 保存定时器的 key

timerState.update(timerKey);

logger.info("processElement1：创建定时器[{}]，等待 2 号流接收数据", Utils.time(timerKey));

} else {

logger.info("processElement1：2 号流收到过[{}]，值是[{}]，现在把两个值相加后输出", key, value2);

// 输出一个新的元素到下游节点

out.collect(new Tuple2<>(key, value.f1 + value2));

// 删除定时器（这个定时器应该是 processElement2 创建的）

long timerKey = timerState.value();

logger.info("processElement1：[{}]的新元素已输出到下游，删除定时器[{}]", key, Utils.time(timerKey));

ctx.timerService().deleteProcessingTimeTimer(timerKey);

clearAllState();

}

@Override

public void processElement2(Tuple2<String, Integer> value, Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception {

logger.info("processElement2：处理元素 2：{}", value);

String key = value.f0;

Integer value1 = state1.value();

// value1 为空，就表示 processElement1 还没有处理或这个 key，

// 这时候就把 value2 保存起来

if(null==value1) {

logger.info("processElement2：1 号流还未收到过[{}]，把 2 号流收到的值[{}]保存起来", key, value.f1);

state2.update(value.f1);

currentKeyState.update(key);

// 开始 10 秒的定时器，10 秒后会进入

long timerKey = ctx.timestamp() + WAIT_TIME;

ctx.timerService().registerProcessingTimeTimer(timerKey);

// 保存定时器的 key

timerState.update(timerKey);

logger.info("processElement2：创建定时器[{}]，等待 1 号流接收数据", Utils.time(timerKey));

} else {

logger.info("processElement2：1 号流收到过[{}]，值是[{}]，现在把两个值相加后输出", key, value1);

// 输出一个新的元素到下游节点

out.collect(new Tuple2<>(key, value.f1 + value1));

// 删除定时器（这个定时器应该是 processElement1 创建的）

long timerKey = timerState.value();

logger.info("processElement2：[{}]的新元素已输出到下游，删除定时器[{}]", key, Utils.time(timerKey));

ctx.timerService().deleteProcessingTimeTimer(timerKey);

clearAllState();

}

@Override

public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<String, Integer>> out) throws Exception {

super.onTimer(timestamp, ctx, out);

String key = currentKeyState.value();

// 定时器被触发，意味着此 key 只在一个中出现过

logger.info("[{}]的定时器[{}]被触发了", key, Utils.time(timestamp));

Integer value1 = state1.value();

Integer value2 = state2.value();

if(null!=value1) {

logger.info("只有 1 号流收到过[{}]，值为[{}]", key, value1);

// 侧输出

ctx.output(source1SideOutput, "source1 side, key [" + key+ "], value [" + value1 + "]");

}

if(null!=value2) {

logger.info("只有 2 号流收到过[{}]，值为[{}]", key, value2);

// 侧输出

ctx.output(source2SideOutput, "source2 side, key [" + key+ "], value [" + value2 + "]");

}

clearAllState();

}

关键点之一：新增状态 timerState，用于保存定时器的 key；
关键点之二：CoProcessFunction 的 onTimer 中拿不到当前 key(KeyedProcessFunction 可以，其 OnTimerCon 《一线大厂 Java 面试题解析+后端开发学习笔记+最新架构讲解视频+实战项目源码讲义》无偿开源威信搜索公众号【编程进阶路】 text 类提供了 API)，因此新增状态 currentKeyState，这样在 onTimer 中就知道当前 key 了；
关键点之三：processElement1 中，处理 aaa 时，如果 2 号流还没收到过 aaa，就存入状态，并启动 10 秒定时器；
关键点之四：processElement2 处理 aaa 时，发现 1 号流收到过 aaa，就相加再输出到下游，并且删除 processElement1 中创建的定时器，aaa 相关的所有状态也全部清理掉；
关键点之五：如果 10 秒内 aaa 在两个流中都出现过，那么一定会流入下游并且定时器会被删除，因此，一旦 onTimer 被执行，意味着 aaa 只在一个流中出现过，而且已经过去 10 秒了，此时在 onTimer 中可以执行流向侧输出的操作；
以上就是双流处理的逻辑和代码，接下来编写 AbstractCoProcessFunctionExecutor 的子类；

[](()业务执行类 AddTwoSourceValueWithTimeout

负责执行整个功能的，是抽象类 AbstractCoProcessFunctionExecutor 的子类，如下，稍后会说明几个关键点：

package com.bolingcavalry.coprocessfunction;

import com.bolingcavalry.Utils;

import org.apache.flink.api.java.tuple.Tuple;

import org.apache.flink.api.java.tuple.Tuple2;

import org.apache.flink.streaming.api.datastream.KeyedStream;

import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;

import org.apache.flink.streaming.api.functions.co.CoProcessFunction;

import org.apache.flink.streaming.api.watermark.Watermark;

import org.apache.flink.util.OutputTag;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

/**

@author will
@email zq2599@gmail.com
@date 2020-11-11 09:48
@description 将两个流中相通 key 的 value 相加，当 key 在一个流中出现后，

public class AddTwoSourceValueWithTimeout extends AbstractCoProcessFunctionExecutor {

private static final Logger logger = LoggerFactory.getLogger(AddTwoSourceValueWithTimeout.class);

// 假设 aaa 流入 1 号源后，在 2 号源超过 10 秒没有收到 aaa，那么 1 号源的 aaa 就会流入 source1SideOutput

final OutputTag<String> source1SideOutput = new OutputTag<String>("source1-sideoutput"){};

// 假设 aaa 流入 2 号源后，如果 1 号源超过 10 秒没有收到 aaa，那么 2 号源的 aaa 就会流入 source2SideOutput

final OutputTag<String> source2SideOutput = new OutputTag<String>("source2-sideoutput"){};

/**

重写父类的方法，保持父类逻辑不变，仅增加了时间戳分配器，向元素中加入时间戳
@param port
@return

@Override

protected KeyedStream<Tuple2<String, Integer>, Tuple> buildStreamFromSocket(StreamExecutionEnvironment env, int port) {

return env

// 监听端口

.socketTextStream("localhost", port)

// 得到的字符串"aaa,3"转成 Tuple2 实例，f0="aaa"，f1=3

.map(new WordCountMap())

// 设置时间戳分配器，用当前时间作为时间戳

.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Integer>>() {

@Override

public long extractTimestamp(Tuple2<String, Integer> element, long previousElementTimestamp) {

long timestamp = System.currentTimeMillis();

logger.info("添加时间戳，值：{}，时间戳：{}", element, Utils.time(timestamp));

// 使用当前系统时间作为时间戳

return timestamp;

}

发布于: 刚刚阅读数: 2

爱好编程进阶

关注

还未添加个人签名 2022.04.13 加入

还未添加个人简介

发布

暂无评论

创作场景

CoProcessFunction 实战三部曲之三：定时器和侧输出

[](()业务执行类 AddTwoSourceValueWithTimeout

爱好编程进阶

评论