- HighWordCountAccumulator.scala
package accumulator
import org.apache.spark.util.AccumulatorV2
import scala.collection.mutable
/*
继承AccumulatorV2类,
传递两个泛型,第一个泛型代表的是累加器add的时候传递数据类型
第二泛型代表的是累加器最终value给你返回的数据类型
*/
class HighWordCountAccumulator extends AccumulatorV2[Array[String], collection.mutable.Map[String, Long]] {
// 累加器,累加单词出现的总次数
var wordCountMap = collection.mutable.Map[String, Long]()
// 判断集合是否为空
override def isZero: Boolean = {
wordCountMap.isEmpty
}
override def copy(): AccumulatorV2[Array[String], mutable.Map[String, Long]] = {
val wordCountAccumulator = new HighWordCountAccumulator()
wordCountAccumulator.wordCountMap = wordCountMap
wordCountAccumulator
}
override def reset(): Unit = {
wordCountMap = collection.mutable.Map[String, Long]()
}
override def add(v: Array[String]): Unit = {
for (word <- v) {
val flag = wordCountMap.contains(word)
if (flag) {
wordCountMap.update(word, wordCountMap.getOrElse(word, 0L) + 1L)
} else {
wordCountMap.put(word, 1L)
}
}
}
override def merge(other: AccumulatorV2[Array[String], mutable.Map[String, Long]]): Unit = {
val res = other.value
for (elem <- res) {
val word = elem._1
val count = elem._2
val flag = wordCountMap.contains(word)
if (flag) {
wordCountMap.update(word, wordCountMap.getOrElse(word, 0L)+count)
} else {
wordCountMap.put(word, count)
}
}
}
override def value: mutable.Map[String, Long] = {
wordCountMap
}
}
- HighAccCode.scala
package accumulator
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object HighAccCode {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[2]").setAppName("accumulator")
val sc = new SparkContext(sparkConf)
val rdd: RDD[String] = sc.textFile("hdfs://node1:9000/wc.txt")
val hwca = new HighWordCountAccumulator()
sc.register(hwca)
val value = rdd.flatMap((line: String) => {
val wordArrays = line.split(" ")
hwca.add(wordArrays)
wordArrays
})
value.collect()
println(hwca.wordCountMap)
sc.stop()
}
}
标签:String,val,--,累加器,wordcount,wordCountMap,mutable,def
From: https://www.cnblogs.com/jsqup/p/16621404.html