import org.apache.spark.sql.functions.{col, regexp_replace, to_date, udf}
----把字符串数组"[0.1,0.2]"转换array<double>:
frame = frame.withColumn("ArrayDoubleValue", regexp_replace(col("ArrayDoubleValue"), "\\]", ""))
.withColumn("ArrayDoubleValue", regexp_replace(col("ArrayDoubleValue"), "\\[", ""))
.withColumn("ArrayDoubleValue", functions.split(col("ArrayDoubleValue"), ","))
//.withColumn("ArrayDoubleValue", replaceArrayNullToZeroUDF(col("ArrayDoubleValue")))
.withColumn("ArrayDoubleValue", col("ArrayDoubleValue").cast("array<double>"))
---把字符串数组“['2021-01-01','2021-01-02','2021-01-03']”转换为array<string>:
frame = frame.withColumn("StringArray", functions.split(col("StringArray"), ","))
.withColumn("StringArray", col("StringArray").cast("array<string>"))
---把字符串日期转换为string 2022-10-20转换为Date:
frame.withColumn("Date",to_date(col("Date"), "yyyy-MM-dd").as("Date"))
标签:01,withColumn,2021,字符串,array,ArrayDoubleValue,col From: https://www.cnblogs.com/ivyJ/p/16982648.html