spark解析aa.txt
1、aa.txt
2、pom.xml
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.17.Final</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.2</version>
</dependency>
3、添加scala SDK
4、WordCount.scala
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
// 1. 创建 SparkConf对象, 并设置 App名字, 并设置为 local 模式
val conf: SparkConf = new SparkConf().setAppName("WordCount").setMaster("local[*]")
// 2. 创建SparkContext对象
val sc = new SparkContext(conf)
// 3. 使用sc创建RDD并执行相应的transformation和action
val lineRDD = sc.textFile(args(0))
val wordCount: Array[(String, Int)] =lineRDD.flatMap(_.split(" "))
.map((_, 1))
.reduceByKey(_ + _)
.collect()
wordCount.foreach(println)
// 4. 关闭连接
sc.stop()
}
}
5、调试配置
6、输出结果
参考 https://cloud.tencent.com/developer/article/1733758
标签:netty,springboot,val,WordCount,sc,spark,SparkConf From: https://www.cnblogs.com/smallfa/p/16814399.html