如何将 300 度等距柱状全景图像转换为立方体面?

我想使用 Python 中的 OpenCV 将 300 度等距柱状全景图像转换为立方体面。我找到了 360 度图像的代码。如何修改它以处理 300 度图像?

import cv2
import numpy as np

def equirectangular_to_cube(img, cube_size):
    h, w = img.shape[:2]
    # Create cube map faces
    cube_faces = np.zeros((cube_size, cube_size * 6, 3), dtype=np.uint8)
    # Calculate the size of each cube face
    face_size = cube_size
    # Define the mapping coordinates for 360 degrees
    x = np.linspace(-np.pi, np.pi, num=w, dtype=np.float32)
    y = np.linspace(np.pi / 2, -np.pi / 2, num=h, dtype=np.float32)
    # Create grid of coordinates
    xx, yy = np.meshgrid(x, y)
    # Calculate 3D coordinates
    z = np.cos(yy) * np.cos(xx)
    x = np.cos(yy) * np.sin(xx)
    y = np.sin(yy)
    # Normalize coordinates
    norm = np.sqrt(x**2 + y**2 + z**2)
    x /= norm
    y /= norm
    z /= norm
    # Map coordinates to cube faces
    front_mask = (z >= np.abs(x)) & (z >= np.abs(y))
    right_mask = (x >= np.abs(y)) & (x >= np.abs(z))
    back_mask = (z <= -np.abs(x)) & (z <= -np.abs(y))
    left_mask = (x <= -np.abs(y)) & (x <= -np.abs(z))
    top_mask = (y >= np.abs(x)) & (y >= np.abs(z))
    bottom_mask = (y <= -np.abs(x)) & (y <= -np.abs(z))
    # Interpolate and assign pixel values to cube faces
    for i in range(cube_size):
        for j in range(cube_size):
            # Front face
            u = (0.5 + 0.5 * x[front_mask] / z[front_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[front_mask] / z[front_mask]) * (h - 1)
            cube_faces[i, j] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            # Right face
            u = (0.5 + 0.5 * z[right_mask] / x[right_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[right_mask] / x[right_mask]) * (h - 1)
            cube_faces[i, j + cube_size] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            # Back face
            u = (0.5 - 0.5 * x[back_mask] / z[back_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[back_mask] / z[back_mask]) * (h - 1)
            cube_faces[i, j + cube_size*2] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            # Left face
            u = (0.5 - 0.5 * z[left_mask] / x[left_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[left_mask] / x[left_mask]) * (h - 1)
            cube_faces[i, j + cube_size*3] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            # Top face
            u = (0.5 + 0.5 * x[top_mask] / y[top_mask]) * (w - 1)
            v = (0.5 - 0.5 * z[top_mask] / y[top_mask]) * (h - 1)
            cube_faces[i, j + cube_size*4] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
            # Bottom face
            u = (0.5 + 0.5 * x[bottom_mask] / y[bottom_mask]) * (w - 1)
            v = (0.5 + 0.5 * z[bottom_mask] / y[bottom_mask]) * (h - 1)
            cube_faces[i, j + cube_size*5] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)
    return cube_faces

# Usage example
image_path = 'path/to/300_degree_image.jpg'
cube_size = 512
img = cv2.imread(image_path)
cube_faces = equirectangular_to_cube(img, cube_size)

# Save the cube faces as separate images
front = cube_faces[:, :cube_size]
right = cube_faces[:, cube_size:cube_size*2]
back = cube_faces[:, cube_size*2:cube_size*3]
left = cube_faces[:, cube_size*3:cube_size*4]
top = cube_faces[:, cube_size*4:cube_size*5]
bottom = cube_faces[:, cube_size*5:]

cv2.imwrite("front.jpg", front)
cv2.imwrite("right.jpg", right)
cv2.imwrite("back.jpg", back)
cv2.imwrite("left.jpg", left)
cv2.imwrite("top.jpg", top)
cv2.imwrite("bottom.jpg", bottom)

它使用 np.linspace(-np.pi, np.pi, num=w, dtype=np.float32) 定义 360 度图像的映射坐标。我如何修改它以适应视野的缩小?



# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    extendedSize = [0,0]
    extendedSize[0] = int(360/300 * inSize[0])
    extendedSize[1] = inSize[1]
    edge = int(extendedSize[0]/4)   # theoretical length of each edge
    for i in range(outSize[0]):
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = range(0,edge*3)
            rng = range(edge,edge*2)
        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
                face2 = face
            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = ( 2.0*edge*(theta + pi)/pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)

            if uf < inSize[0]:
                # Use bilinear interpolation between the four surrounding pixels
                ui = floor(uf)  # coord of pixel to bottom left
                vi = floor(vf)
                u2 = clip(ui+1, 0, inSize[0]-1) # Clip u2 to stay within the valid range
                v2 = clip(vi+1, 0, inSize[1]-1) # Clip v2 to stay within the valid range
                mu = uf-ui      # fraction of way across pixel
                nu = vf-vi
                # Pixel values of four corners
                A = inPix[ui,vi]
                B = inPix[u2,vi]
                C = inPix[ui,v2]
                D = inPix[u2,v2]
                # interpolate
                (r,g,b) = (
                    A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
                    A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
                    A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )
                (r,g,b) = (0,0,0)
            outPix[i,j] = (int(round(r)),int(round(g)),int(round(b)))


extendedSize = [0,0]
extendedSize[0] = int(360/300 * inSize[0])
extendedSize[1] = inSize[1]
edge = int(extendedSize[0]/4)   # theoretical length of each edge




# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    extendedSize = imgIn.size
    extendedSize = (int(360/300 * inSize[0]), inSize[1])
    edge = extendedSize[0]/4   # theoretical length of edge
    # edge = inSize[0]/4   # the length of each edge in pixels
    for i in range(outSize[0]):
        # print(i)
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = range(0, int(edge*3))
            rng = range(int(edge), int(edge*2))
        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
                face2 = face
            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = ( 2.0*edge*(theta + pi)/pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)
            if uf < inSize[0] :
                # Use bilinear interpolation between the four surrounding pixels
                ui = floor(uf)  # coord of pixel to bottom left
                vi = floor(vf)
                u2 = ui+1       # coords of pixel to top right
                v2 = vi+1
                mu = uf-ui      # fraction of way across pixel
                nu = vf-vi
                # Clip coordinates to stay within the valid range
                ui = max(0, min(ui, extendedSize[0]-1))
                u2 = max(0, min(u2, extendedSize[0]-1))
                vi = max(0, min(vi, extendedSize[1]-1))
                v2 = max(0, min(v2, extendedSize[1]-1))
                # # 
                # # Pixel values of four corners
                A = inPix[ui % inSize[0],clip(vi,0,inSize[1]-1)]
                B = inPix[u2 % inSize[0],clip(vi,0,inSize[1]-1)]
                C = inPix[ui % inSize[0],clip(v2,0,inSize[1]-1)]
                D = inPix[u2 % inSize[0],clip(v2,0,inSize[1]-1)]
                # interpolate
                (r,g,b) = (
                    A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
                    A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
                    A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )
                # (r,g,b) = (0,0,0) 
                (r,g,b) = (0,0,0)
            # Clip the coordinates to stay within the output image dimensions
            i_clipped = max(0, min(i, outSize[0]-1))
            j_clipped = max(0, min(j, outSize[1]-1))
            outPix[i_clipped,j_clipped] = (int(round(r)),int(round(g)),int(round(b)))

以下是如何使用 Python 中的 OpenCV 将 300 度等距柱状全景图像转换为立方体面的步骤,以及如何调整提供的代码以实现此目的的说明:

1. 了解坐标系和投影

  • 等距柱状全景 :此类全景使用以水平角度 (θ) 和垂直角度 (φ) 表示的球面坐标。图像的宽度对应于 360° 的 θ,高度对应于 180° 的 φ。
  • 立方体贴图 :立方体贴图由六个正方形面组成,这些面代表了围绕观察者展开的立方体。每个面代表 90° x 90° 的视野。
  • 投影 :从等距柱状坐标到立方体面的转换需要将球面坐标投影到立方体的六个面上。

2. 调整代码

提供的代码适用于 360 度全景图。主要的变化涉及调整水平视野和修改坐标映射。


import cv2
import numpy as np

def equirectangular_to_cube(img, cube_size, fov):
    h, w = img.shape[:2]

    # 创建立方体贴图面
    cube_faces = np.zeros((cube_size, cube_size * 6, 3), dtype=np.uint8)

    # 计算每个立方体面的大小
    face_size = cube_size

    # 定义 300 度的映射坐标
    x = np.linspace(-fov / 2, fov / 2, num=w, dtype=np.float32)
    y = np.linspace(np.pi / 2, -np.pi / 2, num=h, dtype=np.float32)

    # 创建坐标网格
    xx, yy = np.meshgrid(x, y)

    # 计算 3D 坐标
    z = np.cos(yy) * np.cos(xx)
    x = np.cos(yy) * np.sin(xx)
    y = np.sin(yy)

    # 规范化坐标
    norm = np.sqrt(x**2 + y**2 + z**2)
    x /= norm
    y /= norm
    z /= norm

    # 将坐标映射到立方体面
    front_mask = (z >= np.abs(x)) & (z >= np.abs(y))
    right_mask = (x >= np.abs(y)) & (x >= np.abs(z))
    back_mask = (z <= -np.abs(x)) & (z <= -np.abs(y))
    left_mask = (x <= -np.abs(y)) & (x <= -np.abs(z))
    top_mask = (y >= np.abs(x)) & (y >= np.abs(z))
    bottom_mask = (y <= -np.abs(x)) & (y <= -np.abs(z))

    # 将像素值插值并分配给立方体面
    for i in range(cube_size):
        for j in range(cube_size):
            # 前面
            u = (0.5 + 0.5 * x[front_mask] / z[front_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[front_mask] / z[front_mask]) * (h - 1)
            cube_faces[i, j] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

            # 右面
            u = (0.5 + 0.5 * z[right_mask] / x[right_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[right_mask] / x[right_mask]) * (h - 1)
            cube_faces[i, j + cube_size] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

            # 后面
            u = (0.5 - 0.5 * x[back_mask] / z[back_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[back_mask] / z[back_mask]) * (h - 1)
            cube_faces[i, j + cube_size*2] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

            # 左面
            u = (0.5 - 0.5 * z[left_mask] / x[left_mask]) * (w - 1)
            v = (0.5 + 0.5 * y[left_mask] / x[left_mask]) * (h - 1)
            cube_faces[i, j + cube_size*3] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

            # 顶面
            u = (0.5 + 0.5 * x[top_mask] / y[top_mask]) * (w - 1)
            v = (0.5 - 0.5 * z[top_mask] / y[top_mask]) * (h - 1)
            cube_faces[i, j + cube_size*4] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

            # 底面
            u = (0.5 + 0.5 * x[bottom_mask] / y[bottom_mask]) * (w - 1)
            v = (0.5 + 0.5 * z[bottom_mask] / y[bottom_mask]) * (h - 1)
            cube_faces[i, j + cube_size*5] = cv2.remap(img, u.reshape(-1, 1).astype(np.float32), v.reshape(-1, 1).astype(np.float32), cv2.INTER_LINEAR)

    return cube_faces

# 使用示例
image_path = 'path/to/300_degree_image.jpg'
cube_size = 512
fov = 300  # 以度为单位设置视野
img = cv2.imread(image_path)
cube_faces = equirectangular_to_cube(img, cube_size, fov)

# 将立方体面保存为单独的图像
front = cube_faces[:, :cube_size]
right = cube_faces[:, cube_size:cube_size*2]
back = cube_faces[:, cube_size*2:cube_size*3]
left = cube_faces[:, cube_size*3:cube_size*4]
top = cube_faces[:, cube_size*4:cube_size*5]
bottom = cube_faces[:, cube_size*5:]

cv2.imwrite("front.jpg", front)
cv2.imwrite("right.jpg", right)
cv2.imwrite("back.jpg", back)
cv2.imwrite("left.jpg", left)
cv2.imwrite("top.jpg", top)
cv2.imwrite("bottom.jpg", bottom)


  1. 添加 FOV 参数: 该函数现在接受一个 fov 参数(以度为单位),允许指定水平视野。
  2. 调整水平坐标: np.linspace 调用用于 x 的调用现在使用 -fov / 2 fov / 2 作为其限制,将水平范围从 360 度调整为输入 fov

通过这些更改,该代码现在可以正确地将 300 度等距柱状全景图转换为立方体面。请记住将 image_path fov 替换为的实际图像路径和视野。

