ERA5数据下载及其预处理

标签：nc ERA5 im dir output path data 预处理下载

1、ERA5数据下载

（1）下载地址：https://cds-beta.climate.copernicus.eu/datasets

（2）进入网站进行注册、登录（建议选择一个翻译插件，可以直观的看看）

（3）下载步骤----常规下载

[ 1 ] 搜索框输入ERA5,点击search

[ 2 ] 从出现的这几个中进行选择，我选择的是月平均数据，也有每小时的数据

[ 3 ] 一共有三个方面：概述、下载及文文档。点击下载：出现以下页面

[ 4 ] 按照你自己的需求选择数据（包括年份、月份、实际数据）我选择的是地表温度、总降雨量、潜在蒸发数据。下载格式选择NetCDF4。--然后提交表单

然后就会跳转到以下页面，点击下载即可。

二、API下载及预处理

（1）API下载：

1、接着上面[4]，选择完数据之后，下面有一个这个（API请求）：点击显示代码--你选择好数据，他就会给你自动的写好代码。

2、将这个代码粘贴到python中（我用的conda）

3、现在运行会报错。再做一下下面步骤就可以了。

新建txt
点击API请求中的“文档页面”
粘贴url,key这两行代码到txt
保存
修改txt文件名为：.cdsapirc
将文件复制或剪切到之前报错的目标路径下。

4、现在运行代码，就可以下载了。

（在我运行的时候，可能是选择数据很多，下载会出错，但是我一年一年下载就是好的，类似这样）

import cdsapi

dataset = "reanalysis-era5-land-monthly-means"
request = {
    'product_type': ['monthly_averaged_reanalysis_by_hour_of_day'],
    'variable': ['skin_temperature', 'potential_evaporation', 'total_precipitation'],
    'year': ['2024'],
    'month': ['01', '02', '03', '04', '05', '06', '07'],
    'time': ['00:00'],
    'data_format': 'netcdf',
    'download_format': 'unarchived'
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()

最后经过我一年一年的下载（只需要修改年份就行）得到了以下数据文件（文件名是我自己改的）。

（5）数据下载总算结束了，但是一年的在一个文件中，为了方便提取数据，现在进行数据转换。（将.nc转换为.tiff文件，当时选择的数据是skin_temperature、potential_evaporation、total_precipitation三个指标的2010-2024年每月数据）

经过以下代码，转换为每个指标每月的数据。（文件夹名字最好英文吧）

import netCDF4 as nc
from osgeo import gdal, osr
import numpy as np
import os

# 定义写图像文件的函数
def write_img(filename, im_proj, im_geotrans, im_data):
    # 判断栅格数据的数据类型
    if 'int8' in im_data.dtype.name:
        datatype = gdal.GDT_Byte
    elif 'int16' in im_data.dtype.name:
        datatype = gdal.GDT_UInt16
    else:
        datatype = gdal.GDT_Float32
    
    # 判读数组维数
    if len(im_data.shape) == 3:
        im_bands, im_height, im_width = im_data.shape
    else:
        im_bands, (im_height, im_width) = 1, im_data.shape
    
    # 创建文件
    driver = gdal.GetDriverByName("GTiff")
    dataset = driver.Create(filename, im_width, im_height, im_bands, datatype)
    dataset.SetGeoTransform(im_geotrans)  # 写入仿射变换参数
    dataset.SetProjection(im_proj)  # 写入投影
    if im_bands == 1:
        dataset.GetRasterBand(1).WriteArray(im_data)  # 写入数组数据
    else:
        for i in range(im_bands):
            dataset.GetRasterBand(i + 1).WriteArray(im_data[i])
    del dataset

def nc_totif(input_dir, output_path):
    # 获取所有的nc文件
    nc_files = [f for f in os.listdir(input_dir) if f.endswith('.nc')]
    
    # 处理每一个nc文件
    for nc_file in nc_files:
        input_path = os.path.join(input_dir, nc_file)
        
        # 读取nc文件
        tep_data = nc.Dataset(input_path)
        
        # 获取nc文件中对应变量的信息
        lon_data = tep_data.variables["longitude"][:]
        lat_data = tep_data.variables["latitude"][:]
        
        # 影像的左上角&右下角坐标
        lonmin, latmax, lonmax, latmin = [lon_data.min(), lat_data.max(), lon_data.max(), lat_data.min()]
        
        # 分辨率计算
        num_lon = len(lon_data)
        num_lat = len(lat_data)
        lon_res = (lonmax - lonmin) / (float(num_lon) - 1)
        lat_res = (latmax - latmin) / (float(num_lat) - 1)
        
        # 定义投影
        proj = osr.SpatialReference()
        proj.ImportFromEPSG(4326)  # WGS84
        proj = proj.ExportToWkt()  # 重点，转成wkt格式
        
        geotransform = (lonmin, lon_res, 0.0, latmax, 0.0, -lat_res)
        
        # 获取数据
        variables = {
            'skt': 'skin_temperature',
            'pev': 'potential_evaporation',
            'tp': 'total_precipitation'
        }
        
        for var_key, var_name in variables.items():
            if var_key in tep_data.variables:
                data = tep_data.variables[var_key][:]
                
                # 处理每个月的数据
                time_dim = len(data)
                for month_index in range(time_dim):
                    # 输出文件路径
                    year = nc_file.split('_')[0]  # 从文件名中提取年份
                    month = month_index + 1
                    output_file = os.path.join(output_path, f"{year}_{month:02d}_{var_name}.tif")
                    
                    # 获取对应月份的数据
                    monthly_data = data[month_index]
                    write_img(output_file, proj, geotransform, monthly_data)
            else:
                print(f"Variable '{var_key}' not found in {nc_file}")

if __name__ == "__main__":
    # nc文件输入输出路径
    input_dir = "F:/ganhan/data"
    output_path = "F:/ganhan/data/datazh"
    # 确保输出路径存在
    os.makedirs(output_path, exist_ok=True)
    # 读取nc文件，转换为tif文件
    nc_totif(input_dir, output_path)

这是转换后的结果。（可以在Arcgis中打开看看）

（6）数据裁剪--下载的是全球数据，现在我要裁剪到我的范围，考虑到图像比较多，我就直接用python进行裁剪啦。（裁剪的时候我用的shp文件）代码如下：

import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
import os

def crop_tiff_with_shapefile(tiff_path, shapefile_path, output_path):
    # 读取 shapefile 数据
    shapefile = gpd.read_file(shapefile_path)
    
    # 读取 TIFF 文件
    with rasterio.open(tiff_path) as src:
        # 将 shapefile 转换为 GeoJSON 格式
        shapes = [feature['geometry'] for feature in shapefile.iterfeatures()]
        
        # 裁剪数据
        out_image, out_transform = mask(src, shapes, crop=True, all_touched=True)
        
        # 更新元数据
        out_meta = src.meta.copy()
        out_meta.update({
            "driver": "GTiff",
            "count": 1,
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "nodata": 0  # 设置背景值
        })
        
        # 替换背景值为 0
        out_image = np.where(out_image == src.nodata, 0, out_image)
        
        # 保存裁剪后的 TIFF 文件
        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(out_image[0], 1)  # 写入数据

def process_all_tiffs(input_dir, shapefile_path, output_dir):
    # 确保输出路径存在
    os.makedirs(output_dir, exist_ok=True)
    
    # 获取所有 TIFF 文件
    tiff_files = [f for f in os.listdir(input_dir) if f.endswith('.tif')]
    
    # 对每个 TIFF 文件进行裁剪
    for tiff_file in tiff_files:
        input_path = os.path.join(input_dir, tiff_file)
        output_path = os.path.join(output_dir, tiff_file)
        crop_tiff_with_shapefile(input_path, shapefile_path, output_path)
        print(f"Processed {tiff_file}")

if __name__ == "__main__":
    # TIFF 文件和 shapefile 的路径
    input_dir = "F:/ganhan/data/datazh"  # TIFF 文件的输入路径
    output_dir = "F:/ganhan/caijian"  # 裁剪后的 TIFF 文件输出路径
    shapefile_path = "F:/ganhan/arcgis/sd.shp"  # Shapefile 文件路径
    
    # 处理所有 TIFF 文件
    process_all_tiffs(input_dir, shapefile_path, output_dir)

这是裁剪后的图像：

打开一个放在Arcgis里看看就是这样：

至此，数据下载，数据nc转tiff,数据裁剪就进行结束了，可以进行值提取等操作了。

标签：nc,ERA5,im,dir,output,path,data,预处理,下载
From： https://blog.csdn.net/weixin_53731307/article/details/141817603

ERA5数据下载及其预处理

1、ERA5数据下载

二、API下载及预处理

相关文章

赞助商

阅读排行