"""
有一个长文本,需要解析成特定的数据格式
解析后的格式为:
{
'name': 'Variopartner SICAV',
'lei': '529900LPCSV88817QH61',
'sub_fund': [{
'title': 'TARENO GLOBAL WATER SOLUTIONS FUND',
'isin': ['LU2001709034', 'LU2057889995', 'LU2001709547']
}, {
'title': 'TARENO FIXED INCOME FUND',
'isin': ['LU1299722972']
}, {
'title': 'TARENO GLOBAL EQUITY FUND',
'isin': ['LU1299721909', 'LU1299722113', 'LU1299722030']
}, {
'title': 'MIV GLOBAL MEDTECH FUND',
'isin': ['LU0329630999', 'LU0329630130']
}]
}
注意sub_fund数组的个数不是固定为4,并且isin个数不固定,需要写成通用逻辑,以适应最多100个sub_fund。
"""
long_text = """
Variopartner SICAV
529900LPCSV88817QH61
- TARENO GLOBAL WATER SOLUTIONS FUND
LU2001709034
LU2057889995
LU2001709547 - TARENO FIXED INCOME FUND
LU1299722972 - TARENO GLOBAL EQUITY FUND
LU1299721909
LU1299722113
LU1299722030 - MIV GLOBAL MEDTECH FUND
LU0329630999
LU0329630130 - TARENO GLOBAL EQUITY FUND
LU0329630999
LU0329630130
LU0329630999
LU0329630130
"""
dicts = {}
lists = []
result = re.split('\d. ',long_text)
print(result)
data1 = result[0].split('\n')
print(data1)
dicts['name'] = data1[1]
dicts['lei'] = data1[2]
print(dicts)
data2 = result[1:]
for i in data2:
ii = i.split('\n')
lists.append({
'title': ii[0],
'isin': ii[1:-1]
})
dicts['sub_fund'] = lists
print(dicts)