数据处理把 list嵌套字段转成 pyspark dataframe
# coding=utf-8 from pyspark.sql import SparkSession from pyspark.sql.types import * import pandas as pd from pyspark.sql import Row class SparkContext: def __init__(self, name="cleaner"): self.spark = ( SparkSession.builder.appName(name) .config("hive.exec.dynamic.partition", True) .config("hive.exec.dynamic.partition.mode", "nonstrict") .enableHiveSupport() .getOrCreate() ) self.spark.sparkContext.setLogLevel("ERROR") def __enter__(self): return self.spark def __exit__(self, exc_type, exc_val, exc_tb): self.spark.stop() def main(): data = [{'ent_name': '百度', 'credit_code': '1234567890'}, {'ent_name': 'abc', 'credit_code': '121212222'} ] row_data = [Row(**row) for row in data] with SparkContext('test_df') as spark: df = spark.createDataFrame(row_data) df.show() if __name__ == '__main__': main()
标签:__,name,pyspark,df,self,dict,spark From: https://www.cnblogs.com/zzay/p/17351824.html