CLAM CLAM(Clustering-constrained Attention Multiple Instance Learning)。旨在用于数据高效的弱监督计算病理学,特别是使用切片级标签对全切片图像(WSI)进行分类,无需提取感兴趣区域 (ROI)或进行切片级别的标注。
先允许交代一下背景,以免班门弄斧之嫌。是帮师兄实现一下指定区域的分割(使用mask_file做分割)这一功能。代码中实现segment方法时,已经给定了initSegmentation,就是对其动手。主要是对 和 WholeSlideImage.py进行重构,当然还有mask_file的处理,师兄标注处理完后.xml格式,提取组织轮廓和孔洞的坐标转换为.pkl文件。
--save_dir RESULTS_DIRECTORY #输出就在该目录下的三个文件夹中(masks、patches、stitches)
分割示意 (.h5) 分割结果
python path\ --source path\DATA_DIRECTORY_test --save_dir path\RESULTS_DIRECTORY --patch --seg --stitch --mask_dir path\MASK_DIRECTORY
此时终端需要指定 --mask_dir path\MASK_DIRECTORY
import ctypes
import os
import time
import argparse
import pandas as pd
import numpy as np
from tqdm import tqdm
from wsi_core.WholeSlideImage import WholeSlideImage
from wsi_core.wsi_utils import StitchCoords
from wsi_core.batch_process_utils import initialize_df
def stitching(file_path, wsi_object, downscale=64):
start = time.time()
heatmap = StitchCoords(file_path, wsi_object, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False)
total_time = time.time() - start
return heatmap, total_time
def segment(WSI_object, seg_params=None, filter_params=None, mask_file=None):
start_time = time.time()
if mask_file is not None and os.path.isfile(mask_file):
#WSI_object.segmentTissue(**seg_params, filter_params=filter_params)
seg_time_elapsed = time.time() - start_time
return WSI_object, seg_time_elapsed
def patching(WSI_object, **kwargs):
start_time = time.time()
file_path = WSI_object.process_contours(**kwargs)
patch_time_elapsed = time.time() - start_time
return file_path, patch_time_elapsed
def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir,
mask_dir, patch_size=256, step_size=256,
seg_params={'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
'keep_ids': 'none', 'exclude_ids': 'none'},
filter_params={'a_t': 100, 'a_h': 16, 'max_n_holes': 8},
vis_params={'vis_level': -1, 'line_thickness': 500},
patch_params={'use_padding': True, 'contour_fn': 'four_pt'}, patch_level=0,
use_default_params=False, seg=False, save_mask=True,
stitch=False, patch=False, auto_skip=True, process_list=None):
slides = sorted(os.listdir(source))
slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]
if process_list is None:
df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params)
df = pd.read_csv(process_list)
df = initialize_df(df, seg_params, filter_params, vis_params, patch_params)
mask = df['process'] == 1
process_stack = df[mask]
total = len(process_stack)
legacy_support = 'a' in df.keys()
if legacy_support:
print('Detected legacy segmentation csv file, legacy support enabled')
df = df.assign(**{'a_t': np.full((len(df)), int(filter_params['a_t']), dtype=np.uint32),
'a_h': np.full((len(df)), int(filter_params['a_h']), dtype=np.uint32),
'max_n_holes': np.full((len(df)), int(filter_params['max_n_holes']), dtype=np.uint32),
'line_thickness': np.full((len(df)), int(vis_params['line_thickness']), dtype=np.uint32),
'contour_fn': np.full((len(df)), patch_params['contour_fn'])})
seg_times = 0.0
patch_times = 0.0
stitch_times = 0.0
for i in tqdm(range(total)):
df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
idx = process_stack.index[i]
slide = process_stack.loc[idx, 'slide_id']
print("\n\nProgress: {:.2f}, {}/{}".format(i / total, i, total))
print('Processing {}'.format(slide))
df.loc[idx, 'process'] = 0
slide_id, _ = os.path.splitext(slide)
if auto_skip and os.path.isfile(os.path.join(patch_save_dir, slide_id + '.h5')):
print('{} already exists in destination location, skipped'.format(slide_id))
df.loc[idx, 'status'] = 'already_exist'
full_path = os.path.join(source, slide)
WSI_object = WholeSlideImage(full_path)
if use_default_params:
current_vis_params = vis_params.copy()
current_filter_params = filter_params.copy()
current_seg_params = seg_params.copy()
current_patch_params = patch_params.copy()
current_vis_params = {}
current_filter_params = {}
current_seg_params = {}
current_patch_params = {}
for key in vis_params.keys():
if legacy_support and key == 'vis_level':
df.loc[idx, key] = -1
current_vis_params.update({key: df.loc[idx, key]})
for key in filter_params.keys():
if legacy_support and key == 'a_t':
old_area = df.loc[idx, 'a']
seg_level = df.loc[idx, 'seg_level']
scale = WSI_object.level_downsamples[seg_level]
adjusted_area = int(old_area * (scale[0] * scale[1]) / (512 * 512))
current_filter_params.update({key: adjusted_area})
df.loc[idx, key] = adjusted_area
current_filter_params.update({key: df.loc[idx, key]})
for key in seg_params.keys():
if legacy_support and key == 'seg_level':
df.loc[idx, key] = -1
current_seg_params.update({key: df.loc[idx, key]})
for key in patch_params.keys():
current_patch_params.update({key: df.loc[idx, key]})
if current_vis_params['vis_level'] < 0:
if len(WSI_object.level_dim) == 1:
current_vis_params['vis_level'] = 0
wsi = WSI_object.getOpenSlide()
best_level = wsi.get_best_level_for_downsample(64)
current_vis_params['vis_level'] = best_level
if current_seg_params['seg_level'] < 0:
if len(WSI_object.level_dim) == 1:
current_seg_params['seg_level'] = 0
wsi = WSI_object.getOpenSlide()
best_level = wsi.get_best_level_for_downsample(64)
current_seg_params['seg_level'] = best_level
keep_ids = str(current_seg_params['keep_ids'])
if keep_ids != 'none' and len(keep_ids) > 0:
str_ids = current_seg_params['keep_ids']
current_seg_params['keep_ids'] = np.array(str_ids.split(',')).astype(int)
current_seg_params['keep_ids'] = []
exclude_ids = str(current_seg_params['exclude_ids'])
if exclude_ids != 'none' and len(exclude_ids) > 0:
str_ids = current_seg_params['exclude_ids']
current_seg_params['exclude_ids'] = np.array(str_ids.split(',')).astype(int)
current_seg_params['exclude_ids'] = []
w, h = WSI_object.level_dim[current_seg_params['seg_level']]
if w * h > 1e8:
print('level_dim {} x {} is likely too large for successful segmentation, aborting'.format(w, h))
df.loc[idx, 'status'] = 'failed_seg'
df.loc[idx, 'vis_level'] = current_vis_params['vis_level']
df.loc[idx, 'seg_level'] = current_seg_params['seg_level']
seg_time_elapsed = -1
mask_file = os.path.join(mask_dir, f"{slide_id}.pkl")
if seg:
WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params, mask_file)
if save_mask:
mask = WSI_object.visWSI(**current_vis_params)
mask_path = os.path.join(mask_save_dir, slide_id + '.jpg')
patch_time_elapsed = -1
if patch:
current_patch_params.update({'patch_level': patch_level, 'patch_size': patch_size, 'step_size': step_size,
'save_path': patch_save_dir})
file_path, patch_time_elapsed = patching(WSI_object=WSI_object, **current_patch_params)
stitch_time_elapsed = -1
if stitch:
file_path = os.path.join(patch_save_dir, slide_id + '.h5')
if os.path.isfile(file_path):
heatmap, stitch_time_elapsed = stitching(file_path, WSI_object, downscale=64)
stitch_path = os.path.join(stitch_save_dir, slide_id + '.jpg')
print("Segmentation took {} seconds".format(seg_time_elapsed))
print("Patching took {} seconds".format(patch_time_elapsed))
print("Stitching took {} seconds".format(stitch_time_elapsed))
df.loc[idx, 'status'] = 'processed'
seg_times += seg_time_elapsed
patch_times += patch_time_elapsed
stitch_times += stitch_time_elapsed
seg_times /= total
patch_times /= total
stitch_times /= total
df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
print("Average segmentation time in seconds per slide: {}".format(seg_times))
print("Average patching time in seconds per slide: {}".format(patch_times))
print("Average stitching time in seconds per slide: {}".format(stitch_times))
return seg_times, patch_times
parser = argparse.ArgumentParser(description='seg and patch')
parser.add_argument('--source', type=str, help='path to folder containing raw wsi image files')
parser.add_argument('--step_size', type=int, default=256, help='step_size')
parser.add_argument('--patch_size', type=int, default=256, help='patch_size')
parser.add_argument('--patch', default=False, action='store_true')
parser.add_argument('--seg', default=False, action='store_true')
parser.add_argument('--stitch', default=False, action='store_true')
parser.add_argument('--no_auto_skip', default=True, action='store_false')
parser.add_argument('--save_dir', type=str, help='directory to save processed data')
parser.add_argument('--preset', default=None, type=str, help='predefined profile of default segmentation and filter parameters (.csv)')
parser.add_argument('--patch_level', type=int, default=0, help='downsample level at which to patch')
parser.add_argument('--process_list', type=str, default=None, help='name of list of images to process with parameters (.csv)')
parser.add_argument('--mask_dir', type=str, help='directory containing mask files')
if __name__ == '__main__':
args = parser.parse_args()
patch_save_dir = os.path.join(args.save_dir, 'patches')
mask_save_dir = os.path.join(args.save_dir, 'masks')
stitch_save_dir = os.path.join(args.save_dir, 'stitches')
if args.process_list:
process_list = os.path.join(args.save_dir, args.process_list)
process_list = None
print('source: ', args.source)
print('patch_save_dir: ', patch_save_dir)
print('mask_save_dir: ', mask_save_dir)
print('stitch_save_dir: ', stitch_save_dir)
directories = {'source': args.source,
'save_dir': args.save_dir,
'patch_save_dir': patch_save_dir,
'mask_save_dir': mask_save_dir,
'stitch_save_dir': stitch_save_dir,
'mask_dir': args.mask_dir}
for key, val in directories.items():
print("{} : {}".format(key, val))
if key not in ['source', 'mask_dir']:
os.makedirs(val, exist_ok=True)
seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
'keep_ids': 'none', 'exclude_ids': 'none'}
filter_params = {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}
vis_params = {'vis_level': -1, 'line_thickness': 250}
patch_params = {'use_padding': True, 'contour_fn': 'four_pt'}
if args.preset:
preset_df = pd.read_csv(os.path.join('presets', args.preset))
for key in seg_params.keys():
seg_params[key] = preset_df.loc[0, key]
for key in filter_params.keys():
filter_params[key] = preset_df.loc[0, key]
for key in vis_params.keys():
vis_params[key] = preset_df.loc[0, key]
for key in patch_params.keys():
patch_params[key] = preset_df.loc[0, key]
parameters = {'seg_params': seg_params,
'filter_params': filter_params,
'patch_params': patch_params,
'vis_params': vis_params}
seg_times, patch_times = seg_and_patch(**directories, **parameters,
patch_size=args.patch_size, step_size=args.step_size,
seg=args.seg, use_default_params=False, save_mask=True,
stitch=args.stitch, patch_level=args.patch_level, patch=args.patch,
process_list=process_list, auto_skip=args.no_auto_skip)
import math
import os
import time
import xml.etree.ElementTree as ET
from pprint import pprint
from xml.dom import minidom
import multiprocessing as mp
import cv2
import matplotlib.pyplot as plt
import numpy as np
import openslide
from PIL import Image
import pdb
import h5py
import math
from wsi_core.wsi_utils import savePatchIter_bag_hdf5, initialize_hdf5_bag, coord_generator, save_hdf5, sample_indices, screen_coords, isBlackPatch, isWhitePatch, to_percentiles
import itertools
from wsi_core.util_classes import isInContourV1, isInContourV2, isInContourV3_Easy, isInContourV3_Hard, Contour_Checking_fn
from utils.file_utils import load_pkl, save_pkl
from multiprocessing import Pool
Image.MAX_IMAGE_PIXELS = 933120000
class WholeSlideImage(object):
printed_processes = set() # 全局集合,记录已经打印过的进程 ID
def __init__(self, path):
path (str): fullpath to WSI file
# = ".".join(path.split("/")[-1].split('.')[:-1]) = os.path.splitext(os.path.basename(path))[0]
self.wsi = openslide.open_slide(path)
self.level_downsamples = self._assertLevelDownsamples()
self.level_dim = self.wsi.level_dimensions
self.holes_tissue = None
self.contours_tissue = None
self.contours_tumor = None
self.hdf5_file = None
def getOpenSlide(self):
return self.wsi
def initXML(self, xml_path):
def _createContour(coord_list):
return np.array([[[int(float(coord.attributes['X'].value)),
int(float(coord.attributes['Y'].value))]] for coord in coord_list], dtype = 'int32')
xmldoc = minidom.parse(xml_path)
annotations = [anno.getElementsByTagName('Coordinate') for anno in xmldoc.getElementsByTagName('Annotation')]
self.contours_tumor = [_createContour(coord_list) for coord_list in annotations]
self.contours_tumor = sorted(self.contours_tumor, key=cv2.contourArea, reverse=True)
def initTxt(self,annot_path):
def _create_contours_from_dict(annot):
all_cnts = []
for idx, annot_group in enumerate(annot):
contour_group = annot_group['coordinates']
if annot_group['type'] == 'Polygon':
for idx, contour in enumerate(contour_group):
contour = np.array(contour).astype(np.int32).reshape(-1,1,2)
for idx, sgmt_group in enumerate(contour_group):
contour = []
for sgmt in sgmt_group:
contour = np.array(contour).astype(np.int32).reshape(-1,1,2)
return all_cnts
with open(annot_path, "r") as f:
annot =
annot = eval(annot)
self.contours_tumor = _create_contours_from_dict(annot)
self.contours_tumor = sorted(self.contours_tumor, key=cv2.contourArea, reverse=True)
def initSegmentation(self, mask_file):
# load segmentation results from pickle file
import pickle
asset_dict = load_pkl(mask_file)
def _convert_to_numpy(contours):
return [np.array(contour).astype(np.int32).reshape(-1, 1, 2) for contour in contours]
self.holes_tissue = [_convert_to_numpy(holes) for holes in asset_dict['holes']]
self.contours_tissue = _convert_to_numpy(asset_dict['tissue'])
print(f"Loaded {len(self.holes_tissue)} holes and {len(self.contours_tissue)} tissue contours from {mask_file}")
def saveSegmentation(self, mask_file):
# save segmentation results using pickle
asset_dict = {'holes': self.holes_tissue, 'tissue': self.contours_tissue}
save_pkl(mask_file, asset_dict)
def segmentTissue(self, seg_level=0, sthresh=20, sthresh_up = 255, mthresh=7, close = 0, use_otsu=False,
filter_params={'a_t':100}, ref_patch_size=512, exclude_ids=[], keep_ids=[]):
Segment the tissue via HSV -> Median thresholding -> Binary threshold
def _filter_contours(contours, hierarchy, filter_params):
Filter contours by: area.
filtered = []
# find indices of foreground contours (parent == -1)
hierarchy_1 = np.flatnonzero(hierarchy[:,1] == -1)
all_holes = []
# loop through foreground contour indices
for cont_idx in hierarchy_1:
# actual contour
cont = contours[cont_idx]
# indices of holes contained in this contour (children of parent contour)
holes = np.flatnonzero(hierarchy[:, 1] == cont_idx)
# take contour area (includes holes)
a = cv2.contourArea(cont)
# calculate the contour area of each hole
hole_areas = [cv2.contourArea(contours[hole_idx]) for hole_idx in holes]
# actual area of foreground contour region
a = a - np.array(hole_areas).sum()
if a == 0: continue
if tuple((filter_params['a_t'],)) < tuple((a,)):
foreground_contours = [contours[cont_idx] for cont_idx in filtered]
hole_contours = []
for hole_ids in all_holes:
unfiltered_holes = [contours[idx] for idx in hole_ids ]
unfilered_holes = sorted(unfiltered_holes, key=cv2.contourArea, reverse=True)
# take max_n_holes largest holes by area
unfilered_holes = unfilered_holes[:filter_params['max_n_holes']]
filtered_holes = []
# filter these holes
for hole in unfilered_holes:
if cv2.contourArea(hole) > filter_params['a_h']:
return foreground_contours, hole_contours
img = np.array(self.wsi.read_region((0,0), seg_level, self.level_dim[seg_level]))
img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) # Convert to HSV space
img_med = cv2.medianBlur(img_hsv[:,:,1], mthresh) # Apply median blurring
# Thresholding
if use_otsu:
_, img_otsu = cv2.threshold(img_med, 0, sthresh_up, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
_, img_otsu = cv2.threshold(img_med, sthresh, sthresh_up, cv2.THRESH_BINARY)
# Morphological closing
if close > 0:
kernel = np.ones((close, close), np.uint8)
img_otsu = cv2.morphologyEx(img_otsu, cv2.MORPH_CLOSE, kernel)
scale = self.level_downsamples[seg_level]
scaled_ref_patch_area = int(ref_patch_size**2 / (scale[0] * scale[1]))
filter_params = filter_params.copy()
filter_params['a_t'] = filter_params['a_t'] * scaled_ref_patch_area
filter_params['a_h'] = filter_params['a_h'] * scaled_ref_patch_area
# Find and filter contours
contours, hierarchy = cv2.findContours(img_otsu, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) # Find contours
hierarchy = np.squeeze(hierarchy, axis=(0,))[:, 2:]
if filter_params: foreground_contours, hole_contours = _filter_contours(contours, hierarchy, filter_params) # Necessary for filtering out artifacts
self.contours_tissue = self.scaleContourDim(foreground_contours, scale)
self.holes_tissue = self.scaleHolesDim(hole_contours, scale)
#exclude_ids = [0,7,9]
if len(keep_ids) > 0:
contour_ids = set(keep_ids) - set(exclude_ids)
contour_ids = set(np.arange(len(self.contours_tissue))) - set(exclude_ids)
self.contours_tissue = [self.contours_tissue[i] for i in contour_ids]
self.holes_tissue = [self.holes_tissue[i] for i in contour_ids]
def visWSI(self, vis_level=0, color=(0, 255, 0), hole_color=(0, 0, 255), annot_color=(255, 0, 0),
line_thickness=250, max_size=None, top_left=None, bot_right=None, custom_downsample=1,
view_slide_only=False, number_contours=False, seg_display=True, annot_display=True):
downsample = self.level_downsamples[vis_level]
scale = [1 / downsample[0], 1 / downsample[1]]
if top_left is not None and bot_right is not None:
top_left = tuple(top_left)
bot_right = tuple(bot_right)
w, h = tuple((np.array(bot_right) * scale).astype(int) - (np.array(top_left) * scale).astype(int))
region_size = (w, h)
top_left = (0, 0)
region_size = self.level_dim[vis_level]
img = np.array(self.wsi.read_region(top_left, vis_level, region_size).convert("RGB"))
if not view_slide_only:
offset = tuple(-(np.array(top_left) * scale).astype(int))
line_thickness = int(line_thickness * np.sqrt(scale[0] * scale[1]))
if self.contours_tissue is not None and seg_display:
if not number_contours:
cv2.drawContours(img, self.scaleContourDim(self.contours_tissue, scale),
-1, color, line_thickness, lineType=cv2.LINE_8, offset=offset)
for idx, cont in enumerate(self.contours_tissue):
contour = np.array(self.scaleContourDim(cont, scale))
M = cv2.moments(contour)
cX = int(M["m10"] / (M["m00"] + 1e-9))
cY = int(M["m01"] / (M["m00"] + 1e-9))
cv2.drawContours(img, [contour], -1, color, line_thickness, lineType=cv2.LINE_8, offset=offset)
cv2.putText(img, "{}".format(idx), (cX, cY),
cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 10)
# 绘制孔洞轮廓
if self.holes_tissue is not None:
for hole in self.holes_tissue:
hole_points = np.array(hole) # 确保孔洞数据是numpy数组格式
scaled_hole = self.scaleContourDim(hole_points, scale)
# 确保 scaled_hole 是二维点数组
scaled_hole = np.array(scaled_hole, dtype=np.int32).reshape((-1, 1, 2))
# 创建一个空白图像来绘制孔洞
blank_image = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
cv2.drawContours(blank_image, [scaled_hole], -1, 255, thickness=cv2.FILLED)
# 使用 cv2.findContours 检测孔洞轮廓
contours, _ = cv2.findContours(blank_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(img, contours, -1, hole_color, line_thickness, lineType=cv2.LINE_8, offset=offset)
if self.contours_tumor is not None and annot_display:
cv2.drawContours(img, self.scaleContourDim(self.contours_tumor, scale),
-1, annot_color, line_thickness, lineType=cv2.LINE_8, offset=offset)
img = Image.fromarray(img)
w, h = img.size
if custom_downsample > 1:
img = img.resize((int(w / custom_downsample), int(h / custom_downsample)))
if max_size is not None and (w > max_size or h > max_size):
resizeFactor = max_size / w if w > h else max_size / h
img = img.resize((int(w * resizeFactor), int(h * resizeFactor)))
return img
def createPatches_bag_hdf5(self, save_path, patch_level=0, patch_size=256, step_size=256, save_coord=True,
contours = self.contours_tissue
contour_holes = self.holes_tissue
# 打印 contour_holes 的内容
print(f"Contour Holes: {contour_holes}")
print("Creating patches for: ",, "...")
elapsed = time.time()
for idx, cont in enumerate(contours):
holes = contour_holes[idx] if idx < len(contour_holes) else []
# 打印 holes 的内容
print(f"holes for contour {idx}: {holes}")
# 包含轮廓和其孔洞的信息
contour_with_holes = {
'contour': cont,
'holes': holes
# 生成块生成器
patch_gen = self._getPatchGenerator(contour_with_holes, idx, patch_level, save_path, patch_size, step_size,
if self.hdf5_file is None:
first_patch = next(patch_gen)
except StopIteration:
file_path = initialize_hdf5_bag(first_patch, save_coord=save_coord)
self.hdf5_file = file_path
for patch in patch_gen:
patch['contour_holes'] = holes # 添加 holes 信息
print(f"Adding contour_holes to patch: {holes}")
return self.hdf5_file
def _getPatchGenerator(self, cont, cont_idx, patch_level, save_path, patch_size=256, step_size=256,
white_black=True, white_thresh=15, black_thresh=50, contour_fn='four_pt_hard',
holes = cont.get('holes', [])
print(f"holes in _getPatchGenerator for contour {cont_idx}: {holes}")
start_x, start_y, w, h = cv2.boundingRect(cont) if cont is not None else (
0, 0, self.level_dim[patch_level][0], self.level_dim[patch_level][1])
print("BoundingBox:", start_x, start_y, w, h)
print("Contour Area:", cv2.contourArea(cont))
print(f"Number of holes: {len(self.holes_tissue[cont_idx])}")
if custom_downsample > 1:
assert custom_downsample == 2
target_patch_size = patch_size
patch_size = target_patch_size * 2
step_size = step_size * 2
"Custom Downsample: {}, Patching at {} x {}, But Final Patch Size is {} x {}".format(custom_downsample,
patch_downsample = (int(self.level_downsamples[patch_level][0]), int(self.level_downsamples[patch_level][1]))
ref_patch_size = (patch_size * patch_downsample[0], patch_size * patch_downsample[1])
step_size_x = step_size * patch_downsample[0]
step_size_y = step_size * patch_downsample[1]
if isinstance(contour_fn, str):
if contour_fn == 'four_pt':
cont_check_fn = isInContourV3_Easy(contour=cont, patch_size=ref_patch_size[0], center_shift=0.5)
elif contour_fn == 'four_pt_hard':
cont_check_fn = isInContourV3_Hard(contour=cont, patch_size=ref_patch_size[0], center_shift=0.5)
elif contour_fn == 'center':
cont_check_fn = isInContourV2(contour=cont, patch_size=ref_patch_size[0])
elif contour_fn == 'basic':
cont_check_fn = isInContourV1(contour=cont)
raise NotImplementedError
assert isinstance(contour_fn, Contour_Checking_fn)
cont_check_fn = contour_fn
img_w, img_h = self.level_dim[0]
if use_padding:
stop_y = start_y + h
stop_x = start_x + w
stop_y = min(start_y + h, img_h - ref_patch_size[1])
stop_x = min(start_x + w, img_w - ref_patch_size[0])
count = 0
for y in range(start_y, stop_y, step_size_y):
for x in range(start_x, stop_x, step_size_x):
point = (x + ref_patch_size[0] // 2, y + ref_patch_size[1] // 2)
if not self.isInContours(cont_check_fn, (x, y), self.holes_tissue[cont_idx]):
count += 1
patch_PIL = self.wsi.read_region((x, y), patch_level, (patch_size, patch_size)).convert('RGB')
if custom_downsample > 1:
patch_PIL = patch_PIL.resize((target_patch_size, target_patch_size))
if white_black:
if isBlackPatch(np.array(patch_PIL), rgbThresh=black_thresh) or isWhitePatch(np.array(patch_PIL),
patch_info = {'x': x // (patch_downsample[0] * custom_downsample),
'y': y // (patch_downsample[1] * custom_downsample), 'cont_idx': cont_idx,
'patch_level': patch_level,
'downsample': self.level_downsamples[patch_level], 'downsampled_level_dim': tuple(
np.array(self.level_dim[patch_level]) // custom_downsample),
'level_dim': self.level_dim[patch_level],
'patch_PIL': patch_PIL, 'name':, 'save_path': save_path}
yield patch_info
print("patches extracted: {}".format(count))
def isInHoles(pt, contour_holes):
# 获取当前进程 ID
current_process_id = os.getpid()
# 打印 contour_holes 的内容,只打印一次
if current_process_id not in WholeSlideImage.printed_processes:
#print(f"Process ID: {current_process_id}, Contour Holes: {contour_holes}")
# 遍历所有孔洞,检查点是否在任一孔洞内
for index, hole in enumerate(contour_holes):
if hole.size == 0:
continue # 跳过空孔洞
# 转换 4D 数组为 3D 数组
hole = hole.reshape(-1, 1, 2)
# 确保 hole 的数据类型是 np.int32 或 np.float32
if hole.dtype != np.int32 and hole.dtype != np.float32:
hole = hole.astype(np.int32)
# 使用 cv2.boundingRect 找到孔洞的边界框
x, y, w, h = cv2.boundingRect(hole)
if not (x <= pt[0] <= x + w and y <= pt[1] <= y + h):
continue # 点不在当前孔洞的边界框内,继续检查下一个孔洞
# 如果点在边界框内,使用 pointPolygonTest 进行精确的多边形测试
pt_tuple = (int(pt[0]), int(pt[1])) # 确保 pt 是包含两个整数的元组
if cv2.pointPolygonTest(hole, pt_tuple, False) >= 0:
print(f"Point {pt_tuple} is inside hole {index}.") # 打印点和它所在的孔洞索引
return True # 点在孔洞内
return False # 点不在任何孔洞内
def isInContours(cont_check_fn, pt, contour_holes=None, patch_size=None):
if cont_check_fn(pt):
if contour_holes and WholeSlideImage.isInHoles(pt, contour_holes):
print(f"Point {pt} is in holes and will be excluded.")
return False # 点在孔洞内
return True # 点在轮廓内且不在孔洞内
return False # 点不在轮廓内
def scaleContourDim(contours, scale):
# 确保scale是浮点数
if isinstance(scale, (list, tuple)):
scale_x, scale_y = scale
scale_x = scale_y = scale
scaled_contours = []
for cont in contours:
#print(f"Processing contour: {cont}") # 添加调试信息
# 检查并转换轮廓点的格式
if isinstance(cont, np.ndarray):
cont = cont.reshape(-1, 2)
scaled_contour = np.array([[int(x * scale_x), int(y * scale_y)] for x, y in cont], dtype='int32').reshape(-1, 1, 2)
except TypeError as e:
print(f"Error processing contour: {e}")
print(f"Contour data: {cont}")
return scaled_contours
def scaleHolesDim(holes, scale):
scale_x, scale_y = scale if isinstance(scale, (list, tuple)) else (scale, scale)
scaled_holes = []
for hole in holes:
hole = np.array(hole).reshape(-1, 2)
scaled_hole = np.array([[int(x * scale_x), int(y * scale_y)] for x, y in hole], dtype='int32').reshape(-1, 1, 2)
return scaled_holes
def _assertLevelDownsamples(self):
level_downsamples = []
dim_0 = self.wsi.level_dimensions[0]
for downsample, dim in zip(self.wsi.level_downsamples, self.wsi.level_dimensions):
estimated_downsample = (dim_0[0]/float(dim[0]), dim_0[1]/float(dim[1]))
level_downsamples.append(estimated_downsample) if estimated_downsample != (downsample, downsample) else level_downsamples.append((downsample, downsample))
return level_downsamples
def process_contours(self, save_path, patch_level=0, patch_size=256, step_size=256, max_holes_to_remove=3,
save_path_hdf5 = os.path.join(save_path, str( + '.h5')
print("Creating patches for: ",, "...")
elapsed = time.time()
# 假设只有一个轮廓,并且所有孔洞都属于这个轮廓
contour = self.contours_tissue[0] # 直接获取唯一的轮廓
contour_holes = self.holes_tissue # 所有孔洞直接归属于这个轮廓
n_holes = len(contour_holes)
print(f"Total number of holes: {n_holes}")
# 确保孔洞是正确的格式
valid_holes = [hole for hole in contour_holes if len(hole) > 0 and isinstance(hole, (list, np.ndarray))]
valid_holes = [np.array(hole, dtype=np.int32) for hole in valid_holes]
# 检查最终有效孔洞的数量
print(f"Remaining valid holes: {len(valid_holes)}")
# 处理当前轮廓及其孔洞
asset_dict, attr_dict = self.process_contour(contour, valid_holes, patch_level, save_path, patch_size,
step_size, **kwargs)
# 如果生成了数据,则保存到 HDF5 文件中
if len(asset_dict) > 0:
save_hdf5(save_path_hdf5, asset_dict, attr_dict, mode='w')
print("No data to save.")
return self.hdf5_file
def process_contour(self, cont, contour_holes, patch_level, save_path, patch_size=256, step_size=256,
contour_fn='four_pt', use_padding=True, top_left=None, bot_right=None, **kwargs):
cont = np.array(cont, dtype='int32').reshape(-1, 1, 2)
start_x, start_y, w, h = cv2.boundingRect(cont) if cont is not None else (
0, 0, self.level_dim[patch_level][0], self.level_dim[patch_level][1])
patch_downsample = (int(self.level_downsamples[patch_level][0]), int(self.level_downsamples[patch_level][1]))
ref_patch_size = (patch_size * patch_downsample[0], patch_size * patch_downsample[1])
img_w, img_h = self.level_dim[0]
if use_padding:
stop_y = start_y + h
stop_x = start_x + w
stop_y = min(start_y + h, img_h - ref_patch_size[1] + 1)
stop_x = min(start_x + w, img_w - ref_patch_size[0] + 1)
print("Bounding Box:", start_x, start_y, w, h)
print("Contour Area:", cv2.contourArea(cont))
if bot_right is not None:
stop_y = min(bot_right[1], stop_y)
stop_x = min(bot_right[0], stop_x)
if top_left is not None:
start_y = max(top_left[1], start_y)
start_x = max(top_left[0], start_x)
if bot_right is not None or top_left is not None:
w, h = stop_x - start_x, stop_y - start_y
if w <= 0 or h <= 0:
print("Contour is not in specified ROI, skip")
return {}, {}
print("Adjusted Bounding Box:", start_x, start_y, w, h)
if isinstance(contour_fn, str):
if contour_fn == 'four_pt':
cont_check_fn = isInContourV3_Easy(contour=cont, patch_size=ref_patch_size[0], center_shift=0.5)
elif contour_fn == 'four_pt_hard':
cont_check_fn = isInContourV3_Hard(contour=cont, patch_size=ref_patch_size[0], center_shift=0.5)
elif contour_fn == 'center':
cont_check_fn = isInContourV2(contour=cont, patch_size=ref_patch_size[0])
elif contour_fn == 'basic':
cont_check_fn = isInContourV1(contour=cont)
raise NotImplementedError
assert isinstance(contour_fn, Contour_Checking_fn)
cont_check_fn = contour_fn
step_size_x = step_size * patch_downsample[0]
step_size_y = step_size * patch_downsample[1]
x_range = np.arange(start_x, stop_x, step=step_size_x)
y_range = np.arange(start_y, stop_y, step=step_size_y)
x_coords, y_coords = np.meshgrid(x_range, y_range, indexing='ij')
coord_candidates = np.array([x_coords.flatten(), y_coords.flatten()]).transpose()
num_workers = mp.cpu_count()
if num_workers > 4:
num_workers = 4
pool = mp.Pool(num_workers)
iterable = [(coord, contour_holes, ref_patch_size[0], cont_check_fn) for coord in coord_candidates]
results = pool.starmap(WholeSlideImage.process_coord_candidate, iterable)
results = np.array([result for result in results if result is not None])
print('Extracted {} coordinates'.format(len(results)))
if len(results) > 0:
asset_dict = {'coords': results}
attr = {'patch_size': patch_size, # To be considered...
'patch_level': patch_level,
'downsample': self.level_downsamples[patch_level],
'downsampled_level_dim': tuple(np.array(self.level_dim[patch_level])),
'level_dim': self.level_dim[patch_level],
'save_path': save_path}
attr_dict = {'coords': attr}
return asset_dict, attr_dict
return {}, {}
def process_coord_candidate(coord, contour_holes, ref_patch_size, cont_check_fn):
x, y = coord
if WholeSlideImage.isInContours(cont_check_fn, (x, y), contour_holes, patch_size=ref_patch_size):
return coord
return None
def visHeatmap(self, scores, coords, vis_level=-1,
top_left=None, bot_right=None,
patch_size=(256, 256),
blank_canvas=False, canvas_color=(220, 20, 50), alpha=0.4,
blur=False, overlap=0.0,
segment=True, use_holes=True,
binarize=False, thresh=0.5,
scores (numpy array of float): Attention scores
coords (numpy array of int, n_patches x 2): Corresponding coordinates (relative to lvl 0)
vis_level (int): WSI pyramid level to visualize
patch_size (tuple of int): Patch dimensions (relative to lvl 0)
blank_canvas (bool): Whether to use a blank canvas to draw the heatmap (vs. using the original slide)
canvas_color (tuple of uint8): Canvas color
alpha (float [0, 1]): blending coefficient for overlaying heatmap onto original slide
blur (bool): apply gaussian blurring
overlap (float [0 1]): percentage of overlap between neighboring patches (only affect radius of blurring)
segment (bool): whether to use tissue segmentation contour (must have already called self.segmentTissue such that
self.contours_tissue and self.holes_tissue are not None
use_holes (bool): whether to also clip out detected tissue cavities (only in effect when segment == True)
convert_to_percentiles (bool): whether to convert attention scores to percentiles
binarize (bool): only display patches > threshold
threshold (float): binarization threshold
max_size (int): Maximum canvas size (clip if goes over)
custom_downsample (int): additionally downscale the heatmap by specified factor
cmap (str): name of matplotlib colormap to use
if vis_level < 0:
vis_level = self.wsi.get_best_level_for_downsample(32)
downsample = self.level_downsamples[vis_level]
scale = [1/downsample[0], 1/downsample[1]] # Scaling from 0 to desired level
if len(scores.shape) == 2:
scores = scores.flatten()
if binarize:
if thresh < 0:
threshold = 1.0 / len(scores)
threshold = thresh
threshold = 0.0
##### calculate size of heatmap and filter coordinates/scores outside specified bbox region #####
if top_left is not None and bot_right is not None:
scores, coords = screen_coords(scores, coords, top_left, bot_right)
coords = coords - top_left
top_left = tuple(top_left)
bot_right = tuple(bot_right)
w, h = tuple((np.array(bot_right) * scale).astype(int) - (np.array(top_left) * scale).astype(int))
region_size = (w, h)
region_size = self.level_dim[vis_level]
top_left = (0, 0)
bot_right = self.level_dim[0]
w, h = region_size
patch_size = np.ceil(np.array(patch_size) * np.array(scale)).astype(int)
coords = np.ceil(coords * np.array(scale)).astype(int)
print('\ncreating heatmap for: ')
print('top_left: ', top_left, 'bot_right: ', bot_right)
print('w: {}, h: {}'.format(w, h))
print('scaled patch size: ', patch_size)
###### normalize filtered scores ######
if convert_to_percentiles:
scores = to_percentiles(scores)
scores /= 100
######## calculate the heatmap of raw attention scores (before colormap)
# by accumulating scores over overlapped regions ######
# heatmap overlay: tracks attention score over each pixel of heatmap
# overlay counter: tracks how many times attention score is accumulated over each pixel of heatmap
overlay = np.empty(np.flip(region_size), dtype=float)
counter = np.empty(np.flip(region_size), dtype=np.uint16)
count = 0
for idx in range(len(coords)):
score = scores[idx]
coord = coords[idx]
if score >= threshold:
if binarize:
score = 1.0
count += 1
score = 0.0
# accumulate attention
overlay[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]] += score
# accumulate counter
counter[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]] += 1
if binarize:
print('\nbinarized tiles based on cutoff of {}'.format(threshold))
print('identified {}/{} patches as positive'.format(count, len(coords)))
# fetch attended region and average accumulated attention
zero_mask = counter == 0
if binarize:
overlay[~zero_mask] = np.around(overlay[~zero_mask] / counter[~zero_mask])
overlay[~zero_mask] = overlay[~zero_mask] / counter[~zero_mask]
del counter
if blur:
overlay = cv2.GaussianBlur(overlay,tuple((patch_size * (1 - overlap)).astype(int) * 2 + 1), 0)
if segment:
tissue_mask = self.get_seg_mask(region_size, scale, use_holes=use_holes, offset=tuple(top_left))
# return Image.fromarray(tissue_mask) # tissue mask
if not blank_canvas:
# downsample original image and use as canvas
img = np.array(self.wsi.read_region(top_left, vis_level, region_size).convert("RGB"))
# use blank canvas
img = np.array(, mode="RGB", color=(255, 255, 255)))
#return Image.fromarray(img) #raw image
print('\ncomputing heatmap image')
print('total of {} patches'.format(len(coords)))
twenty_percent_chunk = max(1, int(len(coords) * 0.2))
if isinstance(cmap, str):
cmap = plt.get_cmap(cmap)
for idx in range(len(coords)):
if (idx + 1) % twenty_percent_chunk == 0:
print('progress: {}/{}'.format(idx, len(coords)))
score = scores[idx]
coord = coords[idx]
if score >= threshold:
# attention block
raw_block = overlay[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]]
# image block (either blank canvas or orig image)
img_block = img[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]].copy()
# color block (cmap applied to attention block)
color_block = (cmap(raw_block) * 255)[:,:,:3].astype(np.uint8)
if segment:
# tissue mask block
mask_block = tissue_mask[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]]
# copy over only tissue masked portion of color block
img_block[mask_block] = color_block[mask_block]
# copy over entire color block
img_block = color_block
# rewrite image block
img[coord[1]:coord[1]+patch_size[1], coord[0]:coord[0]+patch_size[0]] = img_block.copy()
#return Image.fromarray(img) #overlay
del overlay
if blur:
img = cv2.GaussianBlur(img,tuple((patch_size * (1 - overlap)).astype(int) * 2 + 1), 0)
if alpha < 1.0:
img = self.block_blending(img, vis_level, top_left, bot_right, alpha=alpha, blank_canvas=blank_canvas, block_size=1024)
img = Image.fromarray(img)
w, h = img.size
if custom_downsample > 1:
img = img.resize((int(w / custom_downsample), int(h / custom_downsample)))
if max_size is not None and (w > max_size or h > max_size):
resizeFactor = max_size / w if w > h else max_size / h
img = img.resize((int(w * resizeFactor), int(h * resizeFactor)))
return img
def block_blending(self, img, vis_level, top_left, bot_right, alpha=0.5, blank_canvas=False, block_size=1024):
print('\ncomputing blend')
downsample = self.level_downsamples[vis_level]
w = img.shape[1]
h = img.shape[0]
block_size_x = min(block_size, w)
block_size_y = min(block_size, h)
print('using block size: {} x {}'.format(block_size_x, block_size_y))
shift = top_left # amount shifted w.r.t. (0,0)
for x_start in range(top_left[0], bot_right[0], block_size_x * int(downsample[0])):
for y_start in range(top_left[1], bot_right[1], block_size_y * int(downsample[1])):
#print(x_start, y_start)
# 1. convert wsi coordinates to image coordinates via shift and scale
x_start_img = int((x_start - shift[0]) / int(downsample[0]))
y_start_img = int((y_start - shift[1]) / int(downsample[1]))
# 2. compute end points of blend tile, careful not to go over the edge of the image
y_end_img = min(h, y_start_img + block_size_y)
x_end_img = min(w, x_start_img + block_size_x)
if y_end_img == y_start_img or x_end_img == x_start_img:
#print('start_coord: {} end_coord: {}'.format((x_start_img, y_start_img), (x_end_img, y_end_img)))
# 3. fetch blend block and size
blend_block = img[y_start_img:y_end_img, x_start_img:x_end_img]
blend_block_size = (x_end_img - x_start_img, y_end_img - y_start_img)
if not blank_canvas:
# 4. read actual wsi block as canvas block
pt = (x_start, y_start)
canvas = np.array(self.wsi.read_region(pt, vis_level, blend_block_size).convert("RGB"))
# 4. OR create blank canvas block
canvas = np.array(, mode="RGB", color=(255, 255, 255)))
# 5. blend color block and canvas block
img[y_start_img:y_end_img, x_start_img:x_end_img] = cv2.addWeighted(blend_block, alpha, canvas, 1 - alpha, 0, canvas)
return img
def get_seg_mask(self, region_size, scale, use_holes=False, offset=(0, 0)):
print('\ncomputing foreground tissue mask')
tissue_mask = np.empty(np.flip(region_size), dtype=np.uint8)#np.full(np.flip(region_size), 0).astype(np.uint8)
contours_tissue = self.scaleContourDim(self.contours_tissue, scale)
offset = tuple((np.array(offset) * np.array(scale) * -1).astype(np.int32))
contours_holes = self.scaleHolesDim(self.holes_tissue, scale)
contours_tissue, contours_holes = zip(*sorted(zip(contours_tissue, contours_holes), key=lambda x: cv2.contourArea(x[0]), reverse=True))
for idx in range(len(contours_tissue)):
cv2.drawContours(image=tissue_mask, contours=contours_tissue, contourIdx=idx, color=(1), offset=offset, thickness=-1)
if use_holes:
cv2.drawContours(image=tissue_mask, contours=contours_holes[idx], contourIdx=-1, color=(0), offset=offset, thickness=-1)
# contours_holes = self._scaleContourDim(self.holes_tissue, scale, holes=True, area_thresh=area_thresh)
tissue_mask = tissue_mask.astype(bool)
print('detected {}/{} of region as tissue'.format(tissue_mask.sum(), tissue_mask.size))
return tissue_mask