ICESat-2数据处理的方式一般为将光子数据投影到沿轨距离和高程的二维空间。如下图:
ATL03数据读取
H5是一种数据存储结构,读取原理就是按照该结构获取数据,这里给出两种读取方式。
ATL03的数据字典:ATL03 Product Data Dictionary (nsidc.org)
使用pandas
import warnings
import pandas as pd
def read_hdf5_atl03_beam_pandas(filename, beam, verbose=False):
# 打开HDF5文件进行读取
h5_store = pd.HDFStore(filename, mode='r')
root = h5_store.root
# 为ICESat-2 ATL03变量和属性分配python字典
atl03_mds = {}
# 读取文件中每个输入光束
# beams = [k for k in file_id.keys() if bool(re.match('gt\\d[lr]', k))]
beams = ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r']
if beam not in beams:
print('请填入正确的光束代码')
return
atl03_mds['heights'] = {}
atl03_mds['geolocation'] = {}
# -- 获取每个HDF5变量
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# -- ICESat-2 Heights Group
heights_keys = ['dist_ph_across', 'dist_ph_along', 'h_ph', 'lat_ph', 'lon_ph', 'signal_conf_ph']
for key in heights_keys:
atl03_mds['heights'][key] = root[beam]['heights'][key][:]
geolocation_keys = ['ref_elev', 'ph_index_beg', 'segment_id', 'segment_ph_cnt', 'segment_dist_x', 'segment_length']
# -- ICESat-2 Geolocation Group
for key in geolocation_keys:
atl03_mds['geolocation'][key] = root[beam]['geolocation'][key][:]
h5_store.close()
return atl03_mds
使用h5py
import os
import h5py
import re
def read_hdf5_atl03_beam_h5py(filename, beam, verbose=False):
"""
ATL03 原始数据读取
Args:
filename (str): h5文件路径
beam (str): 光束
verbose (bool): 输出HDF5信息
Returns:
返回ATL03光子数据的heights和geolocation信息
"""
# 打开HDF5文件进行读取
file_id = h5py.File(os.path.expanduser(filename), 'r')
# 输出HDF5文件信息
if verbose:
print(file_id.filename)
print(list(file_id.keys()))
print(list(file_id['METADATA'].keys()))
# 为ICESat-2 ATL03变量和属性分配python字典
atl03_mds = {}
# 读取文件中每个输入光束
beams = [k for k in file_id.keys() if bool(re.match('gt\\d[lr]', k))]
if beam not in beams:
print('请填入正确的光束代码')
return
atl03_mds['heights'] = {}
atl03_mds['geolocation'] = {}
atl03_mds['bckgrd_atlas'] = {}
# -- 获取每个HDF5变量
# -- ICESat-2 Measurement Group
for key, val in file_id[beam]['heights'].items():
atl03_mds['heights'][key] = val[:]
# -- ICESat-2 Geolocation Group
for key, val in file_id[beam]['geolocation'].items():
atl03_mds['geolocation'][key] = val[:]
for key, val in file_id[beam]['bckgrd_atlas'].items():
atl03_mds['bckgrd_atlas'][key] = val[:]
return atl03_mds
重建沿轨道距离
在读取ICESat-2 ATL03数据后,我们需要根据分段信息重建每个光子的沿轨道距离,已知数据如下:
-
ATL03每个分段内的光子都有一个沿轨道距离(
dist_ph_along
),该距离始于当前分段。 -
ATL03每个分段有一个沿轨道距离(
segment_dist_x
),该距离始于赤道交叉口,即真正的沿轨道距离。 -
当前分段内每个光子的相对沿轨道距离 + 当前分段的沿轨道距离 = 每个光子的真正沿轨道距离。
代码如下:
import numpy as np
def get_atl03_x_atc(atl03_mds):
val = atl03_mds
# 初始化
val['heights']['x_atc'] = np.zeros_like(val['heights']['h_ph']) + np.NaN
val['heights']['y_atc'] = np.zeros_like(val['heights']['h_ph']) + np.NaN
val['geolocation']['ref_elev_all'] = np.zeros_like(val['heights']['h_ph'])
# -- ATL03 Segment ID
segment_id = val['geolocation']['segment_id']
# -- 分段中的第一个光子(转换为基于0的索引)
segment_index_begin = val['geolocation']['ph_index_beg'] - 1
# -- 分段中的光子事件数
segment_pe_count = val['geolocation']['segment_ph_cnt']
# -- 每个ATL03段的沿轨道距离
segment_distance = val['geolocation']['segment_dist_x']
# -- 每个ATL03段的轨道长度
segment_length = val['geolocation']['segment_length']
# -- 对ATL03段进行迭代,以计算40m的平均值
# -- 在ATL03中基于1的索引:无效==0
# -- 此处为基于0的索引:无效==-1
segment_indices, = np.nonzero((segment_index_begin[:-1] >= 0) &
(segment_index_begin[1:] >= 0))
for j in segment_indices:
# -- j 段索引
idx = segment_index_begin[j]
# -- 分段中的光子数(使用2个ATL03分段)
c1 = np.copy(segment_pe_count[j])
c2 = np.copy(segment_pe_count[j + 1])
cnt = c1 + c2
# -- 沿轨道和跨轨道距离
# -- 获取当前段光子列表,idx当前段(j)第一个光子数量,c1当前段光子数量,idx+c1当前段长度
distance_along_x = np.copy(val['heights']['dist_ph_along'][idx: idx + cnt])
ref_elev = np.copy(val['geolocation']['ref_elev'][j])
# -- 给当前段的光子加上当前段沿轨道距离
distance_along_x[:c1] += segment_distance[j]
distance_along_x[c1:] += segment_distance[j + 1]
distance_along_y = np.copy(val['heights']['dist_ph_across'][idx: idx + cnt])
val['heights']['x_atc'][idx: idx + cnt] = distance_along_x
val['heights']['y_atc'][idx: idx + cnt] = distance_along_y
val['geolocation']['ref_elev_all'][idx: idx + c1] += ref_elev
ATL03数据截取
在处理ATL03时,我们一般都会获取经过研究区域内的光子数据,因此需要对数据进行截取操作,代码如下:
from glob import glob
from readers.get_ATL03_x_atc import get_atl03_x_atc
from readers.read_HDF5_ATL03 import read_hdf5_atl03_beam, read_hdf5_atl03_coordinate
def read_data(filepath, beam, mask_lat, mask_lon):
"""
读取数据,返回沿轨道距离和高程距离
:param filepath: h5文件路径
:param beam: 轨道光束
:param mask_lat: 维度范围
:param mask_lon: 经度范围
:return:
"""
atl03_file = glob(filepath)
is2_atl03_mds = read_hdf5_atl03_beam(atl03_file[0], beam=beam, verbose=False)
# 添加沿轨道距离到数据中
get_atl03_x_atc(is2_atl03_mds)
# 选择范围
d3 = is2_atl03_mds
subset1 = (d3['heights']['lat_ph'] >= min(mask_lat)) & (d3['heights']['lat_ph'] <= max(mask_lat))
if mask_lon is not None:
if mask_lon[0] is not None and mask_lon[1] is None:
subset1 = subset1 & (d3['heights']['x_atc'] >= mask_lon[0])
elif mask_lon[0] is None and mask_lon[1] is not None:
subset1 = subset1 & (d3['heights']['x_atc'] <= mask_lon[1])
else:
subset1 = subset1 & (d3['heights']['x_atc'] >= min(mask_lon)) & (d3['heights']['x_atc'] <= max(mask_lon))
x_act = d3['heights']['x_atc'][subset1]
h = d3['heights']['h_ph'][subset1]
signal_conf_ph = d3['heights']['signal_conf_ph'][subset1]
lat = d3['heights']['lat_ph'][subset1]
lon = d3['heights']['lon_ph'][subset1]
ref_elev = d3['geolocation']['ref_elev_all'][subset1]
del d3, subset1
return x_act, h, signal_conf_ph, lat, lon, ref_elev
def read_all_beam_coordinate(filepath, mask_lat, mask_lon):
"""
读取所有波束的数据
:param filepath:
:param mask_lat:
:param mask_lon:
:return:
"""
atl03_file = glob(filepath)
is2_atl03_mds = read_hdf5_atl03_coordinate(atl03_file[0])
# 禁止加载全部数据
# if mask_lat is None or len(mask_lat) == 0 or mask_lon is None or len(mask_lon) == 0:
# return False
d3 = is2_atl03_mds
if mask_lon is None and mask_lat is None:
# 加载全部数据
return d3
for beam in is2_atl03_mds.keys():
subset1 = (d3[beam]['lat'] >= min(mask_lat)) & (d3[beam]['lat'] <= max(mask_lat))
subset1 = subset1 & (d3[beam]['lon'] >= min(mask_lon)) & (d3[beam]['lon'] <= max(mask_lon))
d3[beam]['lat'] = d3[beam]['lat'][subset1]
d3[beam]['lon'] = d3[beam]['lon'][subset1]
return d3
数据可视化
使用沿轨道距离和高程数据绘制散点图,示例代码如下:
def save2file(act, h, conf, lat, lon):
"""
保存研究区域的一下数据
:param act: act,沿轨道距离
:param h: h,高程
:param conf: 置信度
:param lat: 维度
:param lon: 经度
"""
points = list(zip(act, h, lat, lon, conf))
data = pd.DataFrame(points, columns=['沿轨道距离', '高程', '维度', '经度', '置信度'])
data.to_csv('result/points_origin.csv', mode='w', index=False)
if __name__ == '__main__':
filepath = r'D:\Users\SongW\Downloads\ATL03_20190222135159_08570207_005_01.h5'
beam = 'gt3l'
mask_lat = [16.533, 16.550]
act, h, conf, lat, lon, ref_elev = read_data(filepath, beam, mask_lat, None)
save2file(act, h, conf, lat, lon)
plt.scatter(act, h)
plt.show()
输出图像如下:
项目源码
sx-code - icesat-2-atl03 (github.com)
标签:读取,val,ATL03,heights,beam,atl03,segment,ICESat From: https://www.cnblogs.com/sw-code/p/18161987