2、数据清洗:
要求将day_id一列中的数值清洗为真实的日期格式,可用字符串表示。
数据1对应日期2021-09-01,依次类推,15对应日期2021-09-15
1 CREATE TABLE IF NOT EXISTS sales_sample ( 2 day_id STRING, 3 sale_nbr STRING, 4 buy_nbr STRING, 5 cnt INT, 6 round INT 7 ) 8 ROW FORMAT DELIMITED 9 FIELDS TERMINATED BY ',' 10 STORED AS TEXTFILE; 11 12 13 CREATE TABLE IF NOT EXISTS target_sales_sample ( 14 day_id STRING, 15 sale_nbr STRING, 16 buy_nbr STRING, 17 cnt INT, 18 round INT 19 ) 20 ROW FORMAT DELIMITED 21 FIELDS TERMINATED BY ',' 22 STORED AS TEXTFILE; 23 24 -- -- 将数据上传到HDFS 25 -- -- hadoop fs -put employees.csv / 26 27 -- -- 导入数据到Hive表 28 LOAD DATA INPATH '/xiyou/sales_sample_20170310.csv' INTO TABLE target_sales_sample; 29 -- 30 -- -- 验证数据导入 31 SELECT * FROM target_sales_sample ; 32 SELECT * FROM sales_sample ; 33 -- -- 使用Hive的UDF将day_id映射为日期格式 34 35 insert overwrite table sales_sample 36 select 37 date_add('2023-09-00',cast(day_id as int)) as day_id, 38 sale_nbr as sale_nbr, 39 buy_nbr as buy_nbr, 40 cnt as cnt, 41 round as round 42 from target_sales_sample;
标签:总结,--,nbr,每日,sales,id,sample,清洗,day From: https://www.cnblogs.com/fan-wang/p/17723668.html