我正在尝试使用 python 将 JSON 数据集导入到我的 PostgreSQL 数据库,但在尝试导入 null 值时会抛出错误。
表的名称是 Loan_info。
我在 python 中尝试过此操作:-
for field in loan_info:
if loan_info[field] in ['Null', 'null', None]:
loan_info[field] = None
但是它抛出错误 - 预期的字符串或类似字节的对象,得到“NoneType”
这是一个示例 JSON 数据集
{"results": [
{
"loan_info": {
"loan_id": "",
"loan_status": "",
"loan_remarks": "",
"loan_approved": "",
"loan_offline_approved_on": "",
"approved_loan_amount_inr": "",
"disbursed_loan_amount_inr": "",
"subvention_cost_percentage": "",
"fldg_withhold_inr": "",
"date_of_approval": "",
"date_of_disbursement": "",
"actual_program_fee": "",
"scholarship_given": "",
"scholarship_percentage": "",
"is_remaining_fee_collected_offline": "",
"remaining_fee_collected_offline_amount": "",
"utr_details": ""
},
"loan_info": {
"loan_id": null,
"loan_status": null,
"loan_remarks": null,
"loan_approved": null,
"loan_offline_approved_on": null,
"approved_loan_amount_inr": null,
"disbursed_loan_amount_inr": null,
"subvention_cost_percentage": null,
"fldg_withhold_inr": null,
"date_of_approval": null,
"date_of_disbursement": null,
"actual_program_fee": null,
"scholarship_given": null,
"scholarship_percentage": null,
"is_remaining_fee_collected_offline": null,
"remaining_fee_collected_offline_amount": null,
"utr_details": null
}
}
]
}
数据库架构:-
CREATE TABLE IF NOT EXISTS loan_info (
loan_id TEXT default null,
loan_status TEXT default null,
loan_remarks TEXT default null,
loan_approved TEXT default null,
loan_offline_approved_on DATE default null,
approved_loan_amount_inr double precision default null,
disbursed_loan_amount_inr double precision default null,
subvention_cost_percentage double precision default null,
fldg_withhold_inr double precision default null,
date_of_approval DATE default null,
date_of_disbursement DATE default null,
actual_program_fee double precision default null,
scholarship_given TEXT default null,
scholarship_percentage double precision default null,
is_remaining_fee_collected_offline TEXT default null,
remaining_fee_collected_offline_amount double precision default null,
utr_details TEXT default null
);
这是我的 python 程序。
import psycopg2
import json
from datetime import datetime
import re
import requests
# Database connection
def connect_db():
return psycopg2.connect(
dbname="Loan",
user="abc",
password="abc",
host="localhost"
)
# Format date
def format_date(date_str):
if date_str and date_str.strip() != '':
if date_str.strip() == '00/00/0000':
return None
try:
return datetime.fromisoformat(date_str).strftime("%Y-%m-%d")
except ValueError:
try:
return datetime.strptime(date_str, '%d/%m/%Y').strftime("%Y-%m-%d")
except ValueError:
return None
return None
# Remove invisible characters
def remove_invisible_chars(value):
if isinstance(value, str):
return re.sub(r'[\u200b-\u200d\uFEFF]', '', value).strip() or None
return value
# Remove commas from numeric fields
def clean_numeric(value):
if isinstance(value, str):
return value.replace(',', '')
return value
# Check if a value is a valid number
def is_valid_number(value):
return re.match(r'^-?\d+(?:\.\d+)?$', value) is not None
# Prepare data for insertion
def prepare_data(data):
for key, value in data.items():
print(f"Processing key: {key}, value: {value}") # Debug statement
value = remove_invisible_chars(value)
value = clean_numeric(value)
if isinstance(value, dict):
data[key] = json.dumps(value)
elif isinstance(value, list):
data[key] = json.dumps(value)
elif isinstance(value, str):
if value == '':
data[key] = None
else:
data[key] = value
elif isinstance(value, (int, float)):
data[key] = value
elif value is None or value in ['Null', 'null']:
data[key] = None
# Specific checks for empty strings to None for integer fields
if key.endswith('_id') or key.endswith('_number') or key.endswith('_count'):
try:
data[key] = int(value) if value != '' else None
except (ValueError, TypeError):
data[key] = None
# Handle double precision and other numerical fields
if key.endswith('_amount') or key.endswith('_percentage') or key.endswith('_score') or key.endswith('_remaining_amount') or key == 'down_payment_remaining_fee':
try:
if is_valid_number(value):
data[key] = float(value)
else:
data[key] = None
except (ValueError, TypeError):
data[key] = None
# Ensure date fields with empty strings are set to None
if key.endswith('_date') or key.endswith('_on'):
data[key] = format_date(value)
# Insert data into table
def insert_data(cur, table, data, page, record_index):
if not data:
return
data = prepare_data(data)
columns = data.keys()
values = [data[column] for column in columns]
print(f"Inserting into {table}: dict_keys({columns}) with values: {values}") # Debug output
for column, value in zip(columns, values):
print(f"Column: {column}, Value: {value}, Type: {type(value)}")
insert_statement = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({', '.join(['%s'] * len(columns))})"
try:
cur.execute(insert_statement, values)
except Exception as e:
print(f"Error inserting data on page {page}, record {record_index}: {e}")
for column, value in zip(columns, values):
if value is None:
print(f"Column {column} has value {value}")
raise
def import_json_to_db(json_file_path, conn):
with open(json_file_path, 'r', encoding='utf-8') as file:
json_data = json.load(file)
with conn.cursor() as cur:
for record in json_data['results']:
loan_info = record.get('loan_info', {})
for field in loan_info:
if loan_info[field] in ['Null', 'null', None]:
loan_info[field] = None
insert_data(cur, 'loan_info', loan_info, page, record_index)
def main():
json_file_path = r"C:\Users\Desktop\DataMigration\response40"
try:
conn = connect_db()
conn.autocommit = False
import_json_to_db(json_file_path, conn)
conn.commit()
print("Data imported successfully.")
except Exception as e:
if conn:
conn.rollback()
print(f"An error occurred: {e}")
finally:
if conn:
conn.close()
if __name__ == "__main__":
main()
此错误的原因是正试图将 Python 的 None 直接插入到数据库中。虽然在 Python 中,可以将 None 赋值给变量以表示空值,但在 PostgreSQL 中,需要使用 NULL 来表示数据库中的空值。
修改的代码,不要将空值设置为 None ,而是使用 psycopg2 库提供的占位符。
以下是修改后的代码:
import psycopg2
import json
from datetime import datetime
import re
import requests
# ... [其他函数保持不变]
# 插入数据到表中
def insert_data(cur, table, data, page, record_index):
if not data:
return
data = prepare_data(data)
columns = data.keys()
values = [data[column] for column in columns]
print(f"插入到 {table}: dict_keys({columns}) 值为: {values}") # 调试输出
for column, value in zip(columns, values):
print(f"列: {column}, 值: {value}, 类型: {type(value)}")
# 使用 %s 占位符并传递一个包含值的元组
insert_statement = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({', '.join(['%s'] * len(columns))})"
try:
cur.execute(insert_statement, values)
except Exception as e:
print(f"在页面 {page},记录 {record_index} 插入数据时出错: {e}")
for column, value in zip(columns, values):
if value is None:
print(f"列 {column} 的值为 {value}")
raise
def import_json_to_db(json_file_path, conn):
with open(json_file_path, 'r', encoding='utf-8') as file:
json_data = json.load(file)
with conn.cursor() as cur:
for record in json_data['results']:
loan_info = record.get('loan_info', {})
# 不需要显式地将空值设置为 None,psycopg2 会处理它
insert_data(cur, 'loan_info', loan_info, 1, 1) # 替换为实际的页码和记录索引
# ... [其他函数保持不变]
在这个修改后的代码中,我们不再需要在
insert_data
函数中显式地将空字符串或 Python 的
None
转换为
NULL
。
psycopg2
会自动将 Python 中的
None
转换为 SQL 中的
NULL
。
这应该可以解决遇到的错误,并允许将包含空值的 JSON 数据成功导入到 PostgreSQL 数据库中。
标签:python,json,data-migration From: 78818389