# 格网算法计算数据集区域数据密集度
import time
import random
import numpy as np
import pandas as pd
# 模拟数据集
def create_data():
data_x = []
data_y = []
data = []
for i in range(300000):
x = random.randrange(0, 300000)
y = random.randrange(-1500, 1500)
data_x.append(x)
data_y.append(y)
data.append([x, y])
return data_x, data_y, data
# 计算网格数据密集度
def calculate_density(gridSize, bounds):
data_x, data_y, data = create_data()
# 计算网格边界
x = np.arange(bounds[0][0],
bounds[1][0] + gridSize,
gridSize)
y = np.arange(bounds[0][1],
bounds[1][1] + gridSize,
gridSize)
# 使用pandas构建网格
grid = pd.DataFrame(0, index=x[:-1], columns=y[:-1])
# 将数据分配到网格中
for point in data:
if point[0] < bounds[0][0] \
or point[0] > bounds[1][0] \
or point[1] < bounds[0][1] \
or point[1] > bounds[1][1]:
continue
# 计算数据在那个网格内
x_index = int((point[0] - bounds[0][0]) // gridSize)
y_index = int((point[1] - bounds[0][1]) // gridSize)
# 将网格计数 +1
grid.iloc[x_index, y_index] += 1
# 计算每个网格的密度
densities = grid.to_numpy() / (gridSize * gridSize)
# 将密度添加到数据中
for point in data:
if point[0] < bounds[0][0] \
or point[0] > bounds[1][0] \
or point[1] < bounds[0][1] \
or point[1] > bounds[1][1]:
continue
# 计算数据在那个网格内
x_index = int((point[0] - bounds[0][0]) // gridSize)
y_index = int((point[1] - bounds[0][1]) // gridSize)
point.append(densities[x_index, y_index])
return densities, data
if __name__ == "__main__":
start_time = time.time()
densities, data = calculate_density(100,
[[0, -1500], [300000, 1500]])
end_time = time.time()
print("消耗的时间:", end_time - start_time)
print(densities)
标签:index,gridSize,point,python,demo,格网,bounds,time,data
From: https://www.cnblogs.com/shallow-dreamer/p/17135005.html