大数据实战训练营 -sparkcore 作业
1 作业一使用 RDD API 实现带词频的倒排索引
复制代码
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
import org.apache.spark.SparkConfimport org.apache.spark.SparkContext
object InvertedIndex {def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("sparkbyexamples.com").setMaster("local[1]")
val sc = new SparkContext(sparkConf)
val bookwordRdd = sc.textFile("D:\\data\\index.txt")
.flatMap {
line =>
val array = line.split("\\.", 2)
val bookName = array(0)
array(1).split("\"")(1).split(" ").map(word => (bookName, word))
}
val finalRDD = bookwordRdd.map(kv => (kv._2, kv._1)).map((_, 1L))
.reduceByKey((x, y) => x + y)
.map { case ((k, v), cnt) => (k, (v, cnt)) }
.groupByKey()
.sortByKey()
.collect()
.foreach(println)
}
}
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
asadasdasdasdasdasdasdasdasasdasdasdasdasdasdas
还未添加个人签名 2018.04.15 加入
还未添加个人简介
促进软件开发及相关领域知识与创新的传播
评论