首页 > 其他分享 >工作量统计

工作量统计

时间:2024-05-25 16:11:33浏览次数:20  
标签:name point dest self 工作量 file var 统计

代码量390行,能讲明白,代码如下:

`import json
import re
import sys
import tkinter as tk
from tkinter import filedialog, messagebox

预处理函数

def remove_comments(source_code):
source_code = re.sub(r'//.', '', source_code, flags=re.MULTILINE)
source_code = re.sub(r'/*.
?*/', '', source_code, flags=re.DOTALL)
return source_code

def process_macros(code):
pattern = r'^\s*#define\s+.+'
code = re.sub(pattern, '', code, flags=re.MULTILINE)
return code

def normalize_whitespace(source_code):
source_code = re.sub(r'[ \t]+', ' ', source_code)
return source_code

def simplify_conditions(source_code):
pattern = r'if\s(\s([a-zA-Z_][a-zA-Z0-9_])\s==\strue\s)'
simplified = r'if (\g<1>)'
source_code = re.sub(pattern, simplified, source_code)
return source_code

def remove_redundant_parentheses(code):
pattern = r'(\s(\w+)\s)'
code = re.sub(pattern, r'(\1)', code)
return code

def extract_identifiers(code):
pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
return re.findall(pattern, code)

def guess_headers(identifiers):
headers = set()
for identifier in identifiers:
if 'print' in identifier or 'scanf' in identifier:
headers.add('<stdio.h>')
elif 'malloc' in identifier or 'free' in identifier:
headers.add('<stdlib.h>')
return headers

def complete_headers(code):
identifiers = extract_identifiers(code)
needed_headers = guess_headers(identifiers)
header_directives = ['#include {}'.format(header) for header in needed_headers]
completed_code = '\n'.join(header_directives) + '\n' + code
return completed_code

主处理逻辑

dangerous_fun = {"gets": 1, "strcpy": 2, "strcat": 2, "fgets": 3, "strncpy": 3, "strncat": 3}
program = {}
variables = {}
stack = []
vulns = []

特征提取

def parse_c_file(file_content):
global program
program["main"] = {"variables": [], "instructions": []}
variables_section = program["main"]["variables"]
instructions_section = program["main"]["instructions"]

variable_pattern = re.compile(r'(?:char|int|long|short|float|double)\s+(\w+)\s*\[\s*(\d+)?\s*\];?')
call_pattern = re.compile(r'(\w+)\s*\(([^)]*)\)\s*;')

lines = file_content.split('\n')
address_counter = 0x400000  # Start address (can be adjusted as necessary)
for line in lines:
    var_match = variable_pattern.search(line)
    if var_match:
        var_name = var_match.group(1)
        var_size = var_match.group(2) or '1'
        address_offset = len(variables_section) * 16 + 16
        var_addr = f"rbp-{address_offset:x}"  # Adjusted address format to lower case
        variables_section.append({
            "name": var_name,
            "address": var_addr
        })
        print(f"Variable parsed: {var_name} at {var_addr}")
    
    call_match = call_pattern.search(line)
    if call_match:
        fn_name = call_match.group(1)
        if fn_name in dangerous_fun:
            instructions_section.append({
                "op": "call",
                "args": {
                    "fnname": f"<{fn_name}@plt>"
                },
                "address": f"0x{address_counter:x}"
            })
            print(f"Function call parsed: {fn_name} at address 0x{address_counter:x}")
            address_counter += 0x20  # Increment for next function address

def load_vars():
global stack
stack_dict = {"rbp": 0, "ret": 8}

for function in program:
    variables[function] = []
    for variable in program[function]['variables']:
        variables[function].append(variable)
        address_match = re.search(r'rbp-(.*)', variable['address'])
        if address_match:
            address = address_match.group(1)
            stack_dict[variable['name']] = -int(address, 16)
            print(f"Variable address loaded: {variable['name']} -> {variable['address']}")

stack = sorted(stack_dict.items(), key=lambda x: x[1])
print(f"Loaded stack: {stack}")

def getVarFromAddr(rbp_addr):
for fun in variables:
for var in variables[fun]:
print(f"Checking address {rbp_addr} against variable {var['name']} with address {var['address']}")
if var['address'].lower() == rbp_addr.lower():
return var
return None

def getStartingPoint(name):
for element in stack:
if element[0] == name:
return element[1]
return None

def updateStack(name, value):
for i in range(len(stack)):
if stack[i][0] == name:
aux = list(stack[i])
aux[1] = value
stack[i] = tuple(aux)
print(f"Updated stack: {stack}")

def simplify():
s = []
for fun in program:
for ins in program[fun]["instructions"]:
if ins["op"] == "call":
s.append(fun)
s.append(ins["args"]["fnname"])
s.append(ins["address"])
print(f"Instruction simplified: {fun} {ins['args']['fnname']} {ins['address']}")
print(f"Simplified instruction set: {s}")
return s

def process(simple):
global stack
for i in range(0, len(simple), 3):
fun = simple[i]
fnname = simple[i + 1]
addr = simple[i + 2]

    if fnname == "<gets@plt>":
        dest_var_addr = 'rbp-10'
        dest_var = getVarFromAddr(dest_var_addr)
        print(f"Processing gets@plt, dest_var: {dest_var}")
        if not dest_var:
            print(f"Error: No variable found for address {dest_var_addr}")
            continue

        starting_point = getStartingPoint(dest_var['name'])
        print(f"Starting point for {dest_var['name']}: {starting_point}")

        for element in stack:
            if element[1] > starting_point:
                vuln_name = getVulnName(element[1])
                print(f"Adding vulnerability {vuln_name} for {dest_var['name']} overflow to {element[0]}")
                addVuln(vuln_name, dest_var["name"], element[0], fnname, addr, fnname)
        addVuln("SCORRUPTION", dest_var["name"], element[0], fnname, addr, fnname)
    
    elif fnname == "<fgets@plt>":
        dest_var_addr = 'rbp-10'
        dest_var = getVarFromAddr(dest_var_addr)
        print(f"Processing fgets@plt, dest_var: {dest_var}, buffer size: 100")
        if not dest_var:
            print(f"Error: No variable found for address {dest_var_addr}")
            continue

        starting_point = getStartingPoint(dest_var['name'])
        print(f"Starting point for {dest_var['name']}: {starting_point}")
        ending_point = starting_point + 100
        print(f"Ending point for fgets: {ending_point}")

        for element in stack:
            if starting_point < element[1] and element[1] < ending_point:
                if element[0] != dest_var['name']:
                    vuln_name = getVulnName(element[1])
                    print(f"Adding vulnerability {vuln_name} for {dest_var['name']} overflow to {element[0]}")
                    addVuln(vuln_name, dest_var["name"], element[0], fnname, addr, fnname)
        
        if ending_point >= 16:
            addVuln(getVulnName(ending_point), dest_var["name"], element[0], fnname, addr, fnname)
        
        updateStack(dest_var['name'], ending_point)

    elif fnname == "<strcpy@plt>" or fnname == "<strcat@plt>" or fnname == "<strncat@plt>":
        dest_var_addr = 'rbp-20'
        source_var_addr = 'rbp-30'
        
        dest_var = getVarFromAddr(dest_var_addr)
        source_var = getVarFromAddr(source_var_addr)
        print(f"Processing {fnname}, dest_var: {dest_var}, source_var: {source_var}")

        if not dest_var or not source_var:
            print(f"Error: Missing variable for addresses dest: {dest_var_addr}, source: {source_var_addr}")
            continue

        starting_point = getStartingPoint(dest_var['name'])
        print(f"Starting point for {dest_var['name']}: {starting_point}")

        init_size = getStartingPoint(source_var['name'])
        ending_point = starting_point + abs(init_size)
        print(f"Ending point for {fnname}: {ending_point}")

        for element in stack:
            print(f"Checking element {element} in stack for overflow comparison")
            if starting_point < element[1] and element[1] <= ending_point:
                if element[0] != dest_var['name']:
                    vuln_name = getVulnName(element[1])
                    print(f"Adding vulnerability {vuln_name} for {dest_var['name']} overflow to {element[0]}")
                    addVuln(vuln_name, dest_var["name"], element[0], fnname, addr, fnname)

        if ending_point >= 16:
            addVuln(getVulnName(ending_point), dest_var["name"], element[0], fnname, addr, fnname)
        
        updateStack(dest_var['name'], ending_point)

    elif fnname == "<strncpy@plt>":
        dest_var_addr = 'rbp-40'
        source_var_addr = 'rbp-50'
        n = 20
        
        dest_var = getVarFromAddr(dest_var_addr)
        source_var = getVarFromAddr(source_var_addr)
        print(f"Processing strncpy@plt, dest_var: {dest_var}, source_var: {source_var}")

        if not dest_var or not source_var:
            print(f"Error: Missing variable for addresses dest: {dest_var_addr}, source: {source_var_addr}")
            continue

        starting_point = getStartingPoint(dest_var['name'])
        print(f"Starting point for {dest_var['name']}: {starting_point}")
        ending_point = starting_point + n
        print(f"Ending point for strncpy: {ending_point}")

        for element in stack:
            print(f"Checking element {element} in stack for overflow comparison")
            if starting_point < element[1] and element[1] < ending_point:
                if element[0] != dest_var['name']:
                    vuln_name = getVulnName(element[1])
                    print(f"Adding vulnerability {vuln_name} for {dest_var['name']} overflow to {element[0]}")
                    addVuln(vuln_name, dest_var["name"], element[0], fnname, addr, fnname)
            
        if ending_point >= 16:
            addVuln(getVulnName(ending_point), dest_var["name"], element[0], fnname, addr, fnname)

        updateStack(dest_var['name'], ending_point)

def getVulnName(ending_point):
if ending_point < 0:
return 'VAROVERFLOW'
if 0 <= ending_point < 8:
return 'RBPOVERFLOW'
if 8 <= ending_point < 16:
return 'RETOVERFLOW'
if ending_point >= 16:
return 'SCORRUPTION'

def addVuln(vulnerability, overflow_var, overflown_var, vuln_function, address, fnname):
v = {}
v['vulnerability'] = vulnerability
v['overflow_var'] = overflow_var
v['vuln_function'] = vuln_function
v['address'] = address
v['fnname'] = fnname[1:-5]

if vulnerability == 'VAROVERFLOW':
    v['description'] = 'Overflow into adjacent variable.'
    v['overflown_var'] = overflown_var
elif vulnerability == 'SCORRUPTION':
    v['description'] = "Stack corruption beyond saved frame for main."
    v['overflown_address'] = "rbp+0x10"

if vulnerability == 'RBPOVERFLOW':
    v['description'] = "Overflow into saved base pointer (rbp)."
elif vulnerability == 'RETOVERFLOW':
    v['description'] = "Overflow into return address."

print(f"Vulnerability added: {v}")
vulns.append(v)

def analyze_file(filepath):
global vulns
vulns = []

with open(filepath, 'r', encoding='utf-8') as infile:
    file_content = infile.read()

# 预处理
file_content = remove_comments(file_content)
file_content = process_macros(file_content)
file_content = normalize_whitespace(file_content)
file_content = simplify_conditions(file_content)
file_content = remove_redundant_parentheses(file_content)
file_content = complete_headers(file_content)

print(f"Processed code:\n{file_content}\n")

parse_c_file(file_content)
print(f"Parsed program: {program}")
load_vars()
simple = simplify()
print(f"Simplified instructions: {simple}")
process(simple)

print(f"Detected vulnerabilities: {vulns}")
return vulns

class BufferOverflowAnalyzerGUI(tk.Tk):
def init(self):
super().init()

    self.title("缓冲区溢出漏洞检测")
    self.geometry("800x500")

    label = tk.Label(self, text = "缓冲区溢出漏洞检测",font=('宋体', '20',))
    label.pack()
    label = tk.Label(self, text = "请选择待检测的代码文件",font=('宋体', '13',))
    label.pack(pady=10)

    file_frame = tk.Frame(self)
    file_frame.pack(pady=10)

    self.select_file_button = tk.Button(file_frame, text="选择文件", command=self.select_file)
    self.select_file_button.pack(side=tk.LEFT)

    self.file_path_label = tk.Label(file_frame, text="", wraplength=500)
    self.file_path_label.pack(side=tk.LEFT, padx=10)

    self.start_analysis_button = tk.Button(file_frame, text="开始检测", command=self.analyze_selected_file)
    self.start_analysis_button.pack(side=tk.LEFT, padx=10)
    
    self.result_text = tk.Text(self, height=20, wrap='word')
    self.result_text.pack(padx=20, pady=20, expand=True, fill='both')

    self.scrollbar = tk.Scrollbar(self.result_text)
    self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    self.result_text.config(yscrollcommand=self.scrollbar.set)
    self.scrollbar.config(command=self.result_text.yview)

    self.selected_file = None

def select_file(self):
    file_path = filedialog.askopenfilename(filetypes=[("C and C++ Files", "*.c *.cpp")])
    if file_path:
        self.selected_file = file_path
        self.file_path_label.config(text=file_path)

def analyze_selected_file(self):
    if not self.selected_file:
        messagebox.showerror("错误", "没有选择文件!")
        return
    
    results = analyze_file(self.selected_file)
    print(f"Analysis results: {results}")
    self.display_results(results)

def display_results(self, results):
    self.result_text.delete("1.0", tk.END)

    if not results:
        self.result_text.insert(tk.END, "未找到缓冲区溢出漏洞.\n")
        return

    for vuln in results:
        self.result_text.insert(tk.END, f"漏洞类型: {vuln['vulnerability']}\n")
        self.result_text.insert(tk.END, f"产生漏洞的函数: {vuln['vuln_function']}\n")
        self.result_text.insert(tk.END, f"溢出变量: {vuln.get('overflow_var', 'N/A')}\n")
        self.result_text.insert(tk.END, f"被覆盖的变量/超出地址: {vuln.get('overflown_var', vuln.get('overflown_address', 'N/A'))}\n")
        self.result_text.insert(tk.END, f"地址: {vuln['address']}\n")
        self.result_text.insert(tk.END, f"危险函数: {vuln['fnname']}\n")
        self.result_text.insert(tk.END, f"描述: {vuln.get('description', 'No description available')}\n")
        self.result_text.insert(tk.END, "-"*40 + "\n")

if name == "main":
app = BufferOverflowAnalyzerGUI()
app.mainloop()
`

标签:name,point,dest,self,工作量,file,var,统计
From: https://www.cnblogs.com/hsjy-love/p/18212536

相关文章

  • (十)统计学基础练习题四(50道选择题)
    本文整理了统计学基础知识相关的练习题,共50道,适用于想巩固统计学基础或备考的同学。来源:如荷学数据科学题库(技术专项-统计学一)。序号之前的题请看往期文章。151) 152) 153) 154) 155) 156) 157) 158) 159) 160) 161) 162) 163) 164) ......
  • (九)统计学基础练习题三(50道选择题)
    本文整理了统计学基础知识相关的练习题,共50道,适用于想巩固统计学基础或备考的同学。来源:如荷学数据科学题库(技术专项-统计学一)。序号之前的题请看往期文章。101)102)103)104)105)106)107)108)109)110)111)112)113)114)115)116)117)118)119)120)121)122)......
  • 【Python数据分析】统计学 四分位距
    四分位距四分位距(interquartilerange,IQR)是描述统计学中的一种方法,,以确定第三四分位数和第一四分位数的差(即......
  • 数据分析面试常问问题(一)(SQL、统计学、业务方面等)
    一、SQL1.union和unionall的区别二者都是SQL中用于合并两个或多个SELECT语句的关键字,区别在于如何处理重复行1️⃣union去除重复行,返回唯一的行2️⃣unionall不去除重复的行,所有行都保留2.几种窗口函数及区别1️⃣ROW_NUMBER()为结果集中的每一行分配一个唯一的整数值,按照指定......
  • 计算机Java项目|Springboot教师工作量管理系统
    作者主页:编程指南针作者简介:Java领域优质创作者、CSDN博客专家、CSDN内容合伙人、掘金特邀作者、阿里云博客专家、51CTO特邀作者、多年架构师设计经验、腾讯课堂常驻讲师主要内容:Java项目、Python项目、前端项目、人工智能与大数据、简历模板、学习资料、面试题库、技术互......
  • 系统统计
    系统设计系统总体功能结构设计1.用户管理模块:包括管理员和学生用户的注册、登录、密码找回等功能。2.社团信息管理模块:用于添加、修改、删除和查询社团的基本信息。3.成员管理模块:用于管理社团成员信息,包括学生申请加入、退出社团的功能,并提供审批功能供社团负责人审核。4.......
  • 蓝桥杯-日志统计
    小明维护着一个程序员论坛。现在他收集了一份”点赞”日志,日志共有N行。其中每一行的格式是:tsid表示在ts时刻编号id的帖子收到一个”赞”。现在小明想统计有哪些帖子曾经是”热帖”。如果一个帖子曾在任意一个长度为D的时间段内收到不少于K个赞,小明就认为这个帖......
  • 逗号分开的字符串,统计个数从高到底排序
    usesWinapi.Windows,Winapi.Messages,System.SysUtils,System.Variants,System.Classes,Vcl.Graphics,System.RegularExpressions, functionCompareStrings(List:TStringList;Index1,Index2:Integer):Integer;beginResult:=StrToInt(List.ValueF......
  • 如何利用 Seaborn 实现高级统计图表
    本文分享自华为云社区《使用Seaborn实现高级统计图表从箱线图到多变量关系探索》,作者:柠檬味拥抱。在数据科学和数据可视化领域,Seaborn是一个备受欢迎的Python可视化库。它建立在Matplotlib的基础之上,提供了更简洁、更美观的图形界面,同时也具备了一些高级统计图表的功能。......
  • 短语挖掘与流行度、一致性及信息度评估:基于文本挖掘与词频统计|附数据代码
    全文链接:https://tecdat.cn/?p=36193原文出处:拓端数据部落公众号在信息爆炸的时代,文本数据呈现出爆炸式的增长,从新闻报道、社交媒体到学术论文,无处不在的文本信息构成了我们获取知识和理解世界的重要来源。然而,如何从海量的文本数据中提取有价值的信息,尤其是那些能够反映主题、......