object ProcessBrowseLogInfoToDM {
def main(args: Array[String]): Unit = {
//1.准备环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val tblEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
env.enableCheckpointing(5000)
import org.apache.flink.streaming.api.scala._
/**
* 2.创建 Kafka Connector,连接消费Kafka dwd中数据
*
*/
tblEnv.executeSql(
"""
|create table kafka_dws_user_login_wide_tbl (
| user_id string,
| product_name string,
| first_category_name string,
| second_category_name string,
| obtain_points string
|) with (
| 'connector' = 'kafka',
| 'topic' = 'KAFKA-DWS-BROWSE-LOG-WIDE-TOPIC',
| 'properties.bootstrap.servers'='node1:9092,node2:9092,node3:9092',
| 'scan.startup.mode'='earliest-offset', --也可以指定 earliest-offset 、latest-offset
| 'properties.group.id' = 'my-group-id',
| 'format' = 'json'
|)
""".stripMargin)
/**
* 3.实时统计每个用户最近10s浏览的商品次数和商品一级、二级种类次数,存入到Clickhouse
*/
val dwsTbl:Table = tblEnv.sqlQuery(
"""
| select user_id,product_name,first_category_name,second_category_name from kafka_dws_user_login_wide_tbl
""".stripMargin)
//4.将Row 类型数据转换成对象类型操作
val browseDS: DataStream[BrowseLogWideInfo] = tblEnv.toAppendStream[Row](dwsTbl)
.map(row => {
val user_id: String = row.getField(0).toString
val product_name: String = row.getField(1).toString
val first_category_name: String = row.getField(2).toString
val second_category_name: String = row.getField(3).toString
BrowseLogWideInfo(null, user_id, null, product_name, null, null, first_category_name, second_category_name, null)
})
val dwsDS: DataStream[ProductVisitInfo] = browseDS.keyBy(info => {
info.first_category_name + "-" + info.second_category_name + "-" + info.product_name
})
.timeWindow(Time.seconds(10))
.process(new ProcessWindowFunction[BrowseLogWideInfo, ProductVisitInfo, String, TimeWindow] {
override def process(key: String, context: Context, elements: Iterable[BrowseLogWideInfo], out: Collector[ProductVisitInfo]): Unit = {
val currentDt: String = DateUtil.getDateYYYYMMDD(context.window.getStart.toString)
val startTime: String = DateUtil.getDateYYYYMMDDHHMMSS(context.window.getStart.toString)
val endTime: String = DateUtil.getDateYYYYMMDDHHMMSS(context.window.getEnd.toString)
val arr: Array[String] = key.split("-")
val firstCatName: String = arr(0)
val secondCatName: String = arr(1)
val productName: String = arr(2)
val cnt: Int = elements.toList.size
out.collect(ProductVisitInfo(currentDt, startTime, endTime, firstCatName, secondCatName, productName, cnt))
}
})
/**
* 5.将以上结果写入到Clickhouse表 dm_product_visit_info 表中
* create table dm_product_visit_info(
* current_dt String,
* window_start String,
* window_end String,
* first_cat String,
* second_cat String,
* product String,
* product_cnt UInt32
* ) engine = MergeTree() order by current_dt
*
*/
//准备向ClickHouse中插入数据的sql
val insertIntoCkSql = "insert into dm_product_visit_info (current_dt,window_start,window_end,first_cat,second_cat,product,product_cnt) values (?,?,?,?,?,?,?)"
val ckSink: SinkFunction[ProductVisitInfo] = MyClickHouseUtil.clickhouseSink[ProductVisitInfo](insertIntoCkSql,new JdbcStatementBuilder[ProductVisitInfo] {
override def accept(pst: PreparedStatement, productVisitInfo: ProductVisitInfo): Unit = {
pst.setString(1,productVisitInfo.currentDt)
pst.setString(2,productVisitInfo.windowStart)
pst.setString(3,productVisitInfo.windowEnd)
pst.setString(4,productVisitInfo.firstCat)
pst.setString(5,productVisitInfo.secondCat)
pst.setString(6,productVisitInfo.product)
pst.setLong(7,productVisitInfo.productCnt)
}
})
//针对数据加入sink
dwsDS.addSink(ckSink)
env.execute()
}
}
评论