通过K-means提取图片主颜色

标签：提取颜色 means image hist path folder data

提示：文章写完后，目录可以自动生成，如何生成可参考右边的帮助文档

文章目录

前言
一、基本流程是什么？
二、使用步骤
总结

前言

想着提取图片当中的颜色，然后查阅了相关代码，吸取了其他大佬们的经验，鼓捣出来的东西，分享出来。

提示：以下是本篇文章正文内容，下面案例可供参考

一、基本流程是什么？

代码基本功能是做到识别一张图片的颜色，通过设定好的聚类数，将颜色聚类，识别结果输入到Excel中，并且可以生成相关图像在文件夹中，更利于可视化。图片样例如下：
在这里插入图片描述

二、使用步骤

1.引入库

代码如下：

from pydantic import ValidationError
from sklearn.cluster import KMeans
import cv2
import matplotlib.pyplot as plt
import numpy as np
from time import sleep
import os
import pandas as pd
from operator import itemgetter

2.从图像中提取主要颜色，并生成十六进制颜色列表

代码如下：

def ExtractColors(img, clust=3):
    if img.lower().endswith(('.png', '.jpg', '.jpeg')):

        image = cv2.imread(img)

        height, width = image.shape[:2]

        if height > 1000 and width > 1000:

            # 调整图像大小的百分比
            scale_percent = 30

            # 计算原始尺寸的50%
            width = int(image.shape[1] * scale_percent / 100)
            height = int(image.shape[0] * scale_percent / 100)
            dsize = (width, height)

            # 调整图像大小
            image = cv2.resize(image, dsize)

        else:
            pass

        # 加载图像并将其从BGR转换为RGB，以便
        # 我们可以用matplotlib来显示它

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 将图像重塑为像素列表
        image = image.reshape((image.shape[0] * image.shape[1], 3))
        # 对像素强度进行聚类
        print("please wait,This may take a moment :)")
        ExtractColors.clt = KMeans(n_clusters=clust)
        ExtractColors.clt.fit(image)

        colors = ExtractColors.clt.cluster_centers_
        colors = colors.astype(int)
        # 将numpy数组转换为列表
        rgblist = np.array(colors).tolist()
        hexlist = ['#%02x%02x%02x' % tuple(i) for i in rgblist]

        for i in range(21):
            print("\r[%-21s] %d%%" % ('=' * i, 5 * i), end='')
            sleep(0.25)

        print("")

        return (hexlist)

    else:
        raise ValidationError("Only .jpg and .png images can be accepted")

3.生成颜色簇的直方图

def centroid_histogram():
    # 获取不同聚类的数量并创建直方图
    # 基于分配给每个聚类的像素数
    numLabels = np.arange(0, len(np.unique(ExtractColors.clt.labels_)) + 1)
    (hist, _) = np.histogram(ExtractColors.clt.labels_, bins=numLabels)
    # 归一化直方图，使其总和为1
    hist = hist.astype("float")
    hist /= hist.sum()
    # 返回直方图
    return hist

这里生成的意义是以便进一步的颜色分析和图表绘制

4.生成颜色分布的饼图，并保存或显示

def ClusterAnalysis(hexlist, histogram, save_path=None):
    if hexlist is None:
        raise TypeError("Hex Color List Not Found")
    else:
        # 过滤掉占比小于0.1%的颜色
        filtered_data = [(color, percent) for color, percent in zip(hexlist, histogram) if percent >= 0.01]
        if not filtered_data:
            raise ValueError("No colors with more than 0.1% presence found.")

        # 重新分配颜色列表和直方图数据
        filtered_hexlist, filtered_hist = zip(*filtered_data)

        fig, ax = plt.subplots(figsize=(12, 6), subplot_kw=dict(aspect="equal"), facecolor='#FAFAFA')
        data = filtered_hist
        color = filtered_hexlist

        wedges, texts, autopcts = ax.pie(data, colors=color, autopct=lambda pct: "{:.1f}%".format(pct) if pct > 5 else '',
                                         pctdistance=0.85, wedgeprops=dict(width=0.3, linewidth=3, edgecolor='white'), startangle=-40)

        plt.setp(autopcts, **{'color': 'black', 'weight': 'bold', 'fontsize': 6})
        bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
        kw = dict(arrowprops=dict(arrowstyle="-"), bbox=bbox_props, zorder=0, va="center")

        for i, p in enumerate(wedges):
            ang = (p.theta2 - p.theta1) / 2. + p.theta1
            y = np.sin(np.deg2rad(ang))
            x = np.cos(np.deg2rad(ang))
            horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
            connectionstyle = "angle,angleA=0,angleB={}".format(ang)
            kw["arrowprops"].update({"connectionstyle": connectionstyle})
            # Only annotate larger wedges with labels
            if data[i] > 5:
                ax.annotate(f"{filtered_hexlist[i]}: {data[i] * 100:.1f}%", xy=(x, y), xytext=(1.5*np.sign(x), 1.4*y),
                            horizontalalignment=horizontalalignment, **kw)

        # Create a legend for smaller wedges
        ax.legend(wedges, [f"{h}: {d*100:.1f}%" for h, d in zip(filtered_hexlist, data)],
                  title="Color Legend", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

        if save_path is not None:
            plt.savefig(save_path, bbox_inches='tight')
            plt.close()
        else:
            plt.show()

5.将颜色数据保存到Excel文件中

def save_color_data_to_excel(image_name, hex_list, hist, output_excel_path):
    # 准备数据并按百分比从大到小排序
    color_data = sorted([(hex_code, f"{percentage * 100:.2f}%") for hex_code, percentage in zip(hex_list, hist) if percentage >= 0.01], key=itemgetter(1), reverse=True)

    # 将排序后的数据转换为pandas DataFrame
    data_dict = {"Image": image_name}
    for i, (color, percentage) in enumerate(color_data):
        data_dict[f"Color_{i + 1}"] = color
        data_dict[f"Percentage_{i + 1}"] = percentage

    df = pd.DataFrame([data_dict])

    # 检查Excel文件是否存在，根据情况选择写入模式
    if os.path.exists(output_excel_path):
        with pd.ExcelWriter(output_excel_path, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:
            # 如果Sheet1已存在，追加数据
            if 'Sheet1' in writer.book.sheetnames:
                df.to_excel(writer, index=False, header=False, startrow=writer.sheets['Sheet1'].max_row)
            else:
                # 如果Sheet1不存在，创建并添加列名
                df.to_excel(writer, index=False, header=True)
    else:
        # 创建一个新的Excel文件并添加数据
        with pd.ExcelWriter(output_excel_path, mode='w', engine='openpyxl') as writer:
            df.to_excel(writer, index=False)

所生成的颜色比例从大到小进行排序到Excel中。最后生成的结果如下图所示：
在这里插入图片描述
如果图片生成的颜色聚类少于设定的簇数，则会有几种显示几种

6.批量处理文件夹中的图像，提取颜色，生成图表，并保存数据到Excel

def analyze_images_in_folder(input_folder, output_folder, num_clusters, output_excel_path):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(input_folder, filename)
            hex_list = ExtractColors(img_path, num_clusters)
            hist = centroid_histogram()  # 确保我们生成了颜色直方图
            save_path = os.path.join(output_folder, os.path.splitext(filename)[0] + '_color_chart.png')
            ClusterAnalysis(hex_list, hist, save_path)  # 现在我们传递了hist作为参数
            save_color_data_to_excel(filename, hex_list, hist, output_excel_path)  # 保存颜色数据到Excel

if __name__ == "__main__":
    input_folder = 'xxxxx'
    output_folder = 'xxxxx'
    num_clusters = 10
    output_excel_path = 'xxxxx'

    analyze_images_in_folder(input_folder, output_folder, num_clusters, output_excel_path)

7.main主程序

设置输入文件夹、输出文件夹、颜色簇数量（我这里设定的是10种）和输出Excel文件路径，并调用analyze_images_in_folder函数处理图像。结果如下图所示：

在这里插入图片描述

总结

以上就是K-means函数相关的颜色聚类使用，本文仅仅简单分享了相关代码，如有不足还望批评指正。

标签：提取,颜色,means,image,hist,path,folder,data
From： https://blog.csdn.net/diyuxiaoguaishou/article/details/139198144