TaskMatrix
https://github.com/chenfei-wu/TaskMatrix
TaskMatrix
TaskMatrix connects ChatGPT and a series of Visual Foundation Models to enable sending and receiving images during chatting.
Insight & Goal:
On the one hand, ChatGPT (or LLMs) serves as a general interface that provides a broad and diverse understanding of a wide range of topics. On the other hand, Foundation Models serve as domain experts by providing deep knowledge in specific domains. By leveraging both general and deep knowledge, we aim at building an AI that is capable of handling various tasks.
https://arxiv.org/pdf/2303.16434
https://github.com/chenfei-wu/TaskMatrix
class BackgroundRemoving: ''' using to remove the background of the given picture ''' template_model = True def __init__(self,VisualQuestionAnswering:VisualQuestionAnswering, Text2Box:Text2Box, Segmenting:Segmenting): self.vqa = VisualQuestionAnswering self.obj_segmenting = ObjectSegmenting(Text2Box,Segmenting) @prompts(name="Remove the background", description="useful when you want to extract the object or remove the background," "the input should be a string image_path" ) def inference(self, image_path): ''' given a image, return the picture only contains the extracted main object ''' updated_image_path = None mask = self.get_mask(image_path) image = Image.open(image_path) mask = Image.fromarray(mask) image.putalpha(mask) updated_image_path = get_new_image_name(image_path, func_name="detect-something") image.save(updated_image_path) return updated_image_path def get_mask(self, image_path): ''' Description: given an image path, return the mask of the main object. Args: image_path (string): the file path of the image Outputs: mask (numpy.ndarray): H x W ''' vqa_input = f"{image_path}, what is the main object in the image?" text_prompt = self.vqa.inference(vqa_input) mask = self.obj_segmenting.get_mask(image_path,text_prompt) return mask
class ConversationBot: def __init__(self, load_dict): # load_dict = {'VisualQuestionAnswering':'cuda:0', 'ImageCaptioning':'cuda:1',...} print(f"Initializing VisualChatGPT, load_dict={load_dict}") if 'ImageCaptioning' not in load_dict: raise ValueError("You have to load ImageCaptioning as a basic function for VisualChatGPT") self.models = {} # Load Basic Foundation Models for class_name, device in load_dict.items(): self.models[class_name] = globals()[class_name](device=device) # Load Template Foundation Models for class_name, module in globals().items(): if getattr(module, 'template_model', False): template_required_names = {k for k in inspect.signature(module.__init__).parameters.keys() if k!='self'} loaded_names = set([type(e).__name__ for e in self.models.values()]) if template_required_names.issubset(loaded_names): self.models[class_name] = globals()[class_name]( **{name: self.models[name] for name in template_required_names}) print(f"All the Available Functions: {self.models}") self.tools = [] for instance in self.models.values(): for e in dir(instance): if e.startswith('inference'): func = getattr(instance, e) self.tools.append(Tool(name=func.name, description=func.description, func=func)) self.llm = OpenAI(temperature=0) self.memory = ConversationBufferMemory(memory_key="chat_history", output_key='output') def init_agent(self, lang): self.memory.clear() #clear previous history if lang=='English': PREFIX, FORMAT_INSTRUCTIONS, SUFFIX = VISUAL_CHATGPT_PREFIX, VISUAL_CHATGPT_FORMAT_INSTRUCTIONS, VISUAL_CHATGPT_SUFFIX place = "Enter text and press enter, or upload an image" label_clear = "Clear" else: PREFIX, FORMAT_INSTRUCTIONS, SUFFIX = VISUAL_CHATGPT_PREFIX_CN, VISUAL_CHATGPT_FORMAT_INSTRUCTIONS_CN, VISUAL_CHATGPT_SUFFIX_CN place = "输入文字并回车,或者上传图片" label_clear = "清除" self.agent = initialize_agent( self.tools, self.llm, agent="conversational-react-description", verbose=True, memory=self.memory, return_intermediate_steps=True, agent_kwargs={'prefix': PREFIX, 'format_instructions': FORMAT_INSTRUCTIONS, 'suffix': SUFFIX}, ) return gr.update(visible = True), gr.update(visible = False), gr.update(placeholder=place), gr.update(value=label_clear) def run_text(self, text, state): self.agent.memory.buffer = cut_dialogue_history(self.agent.memory.buffer, keep_last_n_words=500) res = self.agent({"input": text.strip()}) res['output'] = res['output'].replace("\\", "/") response = re.sub('(image/[-\w]*.png)', lambda m: f'![](file={m.group(0)})*{m.group(0)}*', res['output']) state = state + [(text, response)] print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n" f"Current Memory: {self.agent.memory.buffer}") return state, state def run_image(self, image, state, txt, lang): image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png") print("======>Auto Resize Image...") img = Image.open(image.name) width, height = img.size ratio = min(512 / width, 512 / height) width_new, height_new = (round(width * ratio), round(height * ratio)) width_new = int(np.round(width_new / 64.0)) * 64 height_new = int(np.round(height_new / 64.0)) * 64 img = img.resize((width_new, height_new)) img = img.convert('RGB') img.save(image_filename, "PNG") print(f"Resize image form {width}x{height} to {width_new}x{height_new}") description = self.models['ImageCaptioning'].inference(image_filename) if lang == 'Chinese': Human_prompt = f'\nHuman: 提供一张名为 {image_filename}的图片。它的描述是: {description}。 这些信息帮助你理解这个图像,但是你应该使用工具来完成下面的任务,而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\". \n' AI_prompt = "收到。 " else: Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n' AI_prompt = "Received. " self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt state = state + [(f"![](file={image_filename})*{image_filename}*", AI_prompt)] print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n" f"Current Memory: {self.agent.memory.buffer}") return state, state, f'{txt} {image_filename} '
标签:name,image,TaskMatrix,state,agent,path,self From: https://www.cnblogs.com/lightsong/p/18448877