package com.ustcinfo.SDK
import java.sql.{Connection, PreparedStatement}
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.spark.sql._
object SDK_number {
val spark: SparkSession = SparkSession
.builder()
.appName("SDK数据入库")
.master("local[2]")
.config("spark.debug.maxToStringFields", 1000)
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
import spark.implicits._ //导入隐式转换
import org.apache.spark.sql.functions._ //导入function函数
def main(args: Array[String]): Unit = {
val str = args(0)
val gbkPath = s"/bdtj/test/app$str.txt"
val res_mysql: DataFrame = spark.sparkContext.hadoopFile(gbkPath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], 1)
.map(p => new String(p._2.getBytes, 0, p._2.getLength, "UTF-8"))
//.filter(_.split("\\|\\+\\|", -1).length == 4)
.map(lines => {
val line: Array[String] = lines.split("\\|\\+\\|", -1)
(line(0).drop(1), line(1), line(2), line(3))
}
).toDF("ID", "FIVE_ID", "THREE_ID", "TESTTIME")
res_mysql.show(100, false)
//数据以DF格式进行foreachPartition对每一条数据进行遍历
res_mysql.select("ID", "FIVE_ID", "THREE_ID").foreachPartition(f => {
val connection: Connection = createConnection()//调用createConnection方法,实现对数据库连接
connection.setAutoCommit(false)
//设置connection.setautocommit(false);只有程序调用connection.commit()的时候才会将先前执行的语句一起提交到数据库,这样就实现了数据库的事务。
//true:sql命令的提交(commit)由驱动程序负责
//false:sql命令的提交由应用程序负责,程序必须调用commit或者rollback方法
val sql: String = "insert into sdk_grid_num(ID,FIVE_ID,THREE_ID)" +
"value(?,?,?) on duplicate key update FIVE_ID=?,THREE_ID=?"
println("更新数据库中的数据")
val statement: PreparedStatement = connection.prepareStatement(sql)
f.foreach(row => {
//这里注意更新插入字段顺序要一直
statement.setString(1, row.getAs[String]("ID"))
statement.setString(2, row.getAs[String]("FIVE_ID"))
statement.setString(3, row.getAs[String]("THREE_ID"))
statement.setString(5, row.getAs[String]("FIVE_ID"))
statement.setString(6, row.getAs[String]("THREE_ID"))
statement.addBatch()
})
statement.executeBatch()
connection.commit()
statement.close()
connection.close()
})
res_mysql.unpersist()
}
def createConnection(): Connection = {
Class.forName("com.mysql.cj.jdbc.Driver").newInstaabcsnce()
java.sql.DriverManager.getConnection("jdbc:mysql://132.123.13.113:3306/ads?useUnicode=true&characterEncoding=utf8", "abc", "123456")
}
}
标签:String,val,sql,ID,statement,MySQL,spark,主键 From: https://www.cnblogs.com/xiguabigdata/p/16850035.html