1. 准备工作:
安装pyspark库(pip install pyspark)
2. 代码:
# 导入模块df = spark.read.format("jdbc") \ .option("url", mysql_url) \ .option("user", user) \ .option("password", password) \ .option("driver", "com.mysql.cj.jdbc.Driver") \ .option("dbtable", dbtable) \ .load()
from pyspark.sql import SparkSession
# 创建Spark会话对象 spark = SparkSession.builder \ .appName("Read MySQL") \ .getOrCreate() # 或者写成一行 spark = SparkSession.builder.appName("Read MySQL").getOrCreate() # 设置JDBC连接参数 url = "jdbc:mysql://localhost/mydatabase" properties = { "user": "username", "password": "password", "driver": "com.mysql.cj.jdbc.Driver" } # 通过format指定要读取的格式为jdbc,并传入相应的URL、属性等信息
# 使用spark.read.format()...load()
来加载MySQL表中的数据到DataFrame中
df = spark.read.format("jdbc").option("url", url).options(**properties) \ .option("dbtable", "tablename").load()
# 或者每一个属性单独写一行
# 显示(打印)DataFrame内容 df.show()
标签:jdbc,option,format,Python,read,MySQL,spark From: https://www.cnblogs.com/nn2dw/p/18058789