今日对之前学习的pyspark内容进行了梳理,同时尝试了通过SparkSQL的JDBC方式从mysql读取数据和写入数据
# coding:utf8 from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StringType, IntegerType import pandas as pd if __name__ == '__main__': spark = SparkSession.builder. \ appName("test"). \ master("local[*]"). \ getOrCreate() sc = spark.sparkContext schema = StructType().add("user_id",StringType(),nullable=True).\ add("movie_id",IntegerType(),nullable=True).\ add("rank",IntegerType(),nullable=True).\ add("ts",StringType(),nullable=True) df = spark.read.format("csv"). \ option("sep", ","). \ option("header", False). \ option("encoding", "utf-8"). \ schema(schema=schema). \ load("../data/input/u.data") # df.write.mode("overwrite").\ # format("jdbc").\ # option("url","jdbc:mysql://192.168.88.161:3306/ke?useSSL=false&useUnicode=true").\ # option("dbtable","movie_data").\ # option("user","root").\ # option("password","123456").\ # save() df2 = spark.read.format("jdbc"). \ option("url", "jdbc:mysql://192.168.88.161:3306/ke?useSSL=false&useUnicode=true"). \ option("dbtable", "movie_data"). \ option("user", "root"). \ option("password", "123456").\ load() df2.printSchema() df2.show()
标签:11,__,option,nullable,add,寒假,spark,True,大三 From: https://www.cnblogs.com/wrf1/p/17977255