首页 > 其他分享 >驱动蓝屏0x50 PAGE_FAULT_IN_NONPAGED_AREA

驱动蓝屏0x50 PAGE_FAULT_IN_NONPAGED_AREA

时间:2022-11-07 21:39:30浏览次数:51  
标签:AREA 00000000 FAULT memcmp enummod 内存 address 蓝屏 页面


    想写个暴力搜索nt模块中未导出函数,结果一直蓝屏:

unsigned long MatchFunc(PKLDR_DATA_TABLE_ENTRY ldrDataTabEntry)
{
unsigned long found = 0x00UL, CodeFeatureLen = sizeof(CodeFeature) / sizeof(unsigned char), idx = 0;
UNICODE_STRING matchModName;
char* modBase = (char*)ldrDataTabEntry->DllBase;

RtlInitUnicodeString(&matchModName, L"ntoskrnl.exe");
if (RtlCompareUnicodeString(&matchModName, &ldrDataTabEntry->BaseDllName, TRUE) == 0)
{
for (; idx<(ldrDataTabEntry->SizeOfImage - CodeFeatureLen); idx++)
{
if (memcmp(&modBase[idx], CodeFeature, CodeFeatureLen) == 0)
{
found = 0x01UL;
break;
}
}

}

return found;
}

dump的内容大概如下:

PAGE_FAULT_IN_NONPAGED_AREA (50)
Invalid system memory was referenced. This cannot be protected by try-except.
Typically the address is just plain bad or it is pointing at freed memory.
Arguments:
Arg1: 82d8a000, memory referenced.
Arg2: 00000000, value 0 = read operation, 1 = write operation.
Arg3: 93f3115e, If non-zero, the instruction address which referenced the bad memory
address.
Arg4: 00000000, (reserved)

READ_ADDRESS: 82d8a000

FAULTING_IP:
enummod!memcmp+16 [d:\wbrtm\minkernel\crts\crtw32\string\i386\memcmp.c @ 70]
93f3115e 8b02 mov eax,dword ptr [edx]


TRAP_FRAME: 807e3a08 -- (.trap 0xffffffff807e3a08)
ErrCode = 00000000
eax=82d89ffd ebx=00000000 ecx=00000019 edx=82d89ffd esi=93f33000 edi=00000004
eip=93f3115e esp=807e3a7c ebp=807e3a88 iopl=0 nv up ei pl nz na po nc
cs=0008 ss=0010 ds=0023 es=0023 fs=0030 gs=0000 efl=00010202
enummod!memcmp+0x16:
93f3115e 8b02 mov eax,dword ptr [edx] ds:0023:82d89ffd=????????
Resetting default scope

LAST_CONTROL_TRANSFER: from 82aeae71 to 82a79394

STACK_TEXT:
...
807e3968 82a938e3 00000050 82d8a000 00000000 nt!KeBugCheck2+0x68b
807e39f0 82a545f8 00000000 82d8a000 00000000 nt!MmAccessFault+0x106
807e39f0 93f3115e 00000000 82d8a000 00000000 nt!KiTrap0E+0xdc
807e3a88 93f31128 82d89ffd 93f33000 00000019 enummod!memcmp+0x16 [d:\wbrtm\minkernel\crts\crtw32\string\i386\memcmp.c @ 70]
807e3ab8 93f31093 84f42c98 00220020 93f311de enummod!MatchFunc+0x78 [c:\studio\enummod\enummod\enummod.c @ 49]
807e3ad8 82bb1728 850a0358 850a3000 00000000 enummod!DriverEntry+0x93 [c:\studio\enummod\enummod\enummod.c @ 164]
807e3cbc 82baf499 00000001 00000000 807e3ce4 nt!IopLoadDriver+0x7ed
807e3d00 82a7bf2b 8ce35cd0 00000000 85010020 nt!IopLoadUnloadDriver+0x70
...

FOLLOWUP_IP:
enummod!memcmp+16 [d:\wbrtm\minkernel\crts\crtw32\string\i386\memcmp.c @ 70]
93f3115e 8b02 mov eax,dword ptr [edx]
...

Followup: MachineOwner
---------

kd> .trap 0xffffffff807e3a08
ErrCode = 00000000
eax=82d89ffd ebx=00000000 ecx=00000019 edx=82d89ffd esi=93f33000 edi=00000004
eip=93f3115e esp=807e3a7c ebp=807e3a88 iopl=0 nv up ei pl nz na po nc
cs=0008 ss=0010 ds=0023 es=0023 fs=0030 gs=0000 efl=00010202
enummod!memcmp+0x16:
93f3115e 8b02 mov eax,dword ptr [edx] ds:0023:82d89ffd=????????

kd> !pte 82d89ffd
VA 82d89ffd
PDE at C06020B0 PTE at C0416C48
contains 00000000001D2063 contains 0000000002D89963
pfn 1d2 ---DA--KWEV pfn 2d89 -G-DA--KWEV

kd> !dd 2D89ffd L4
# 2d89ffc 00000000 00000000 00000000 00000000

乍一看错误解释,以为是在高IRQL上引用了分页内存,心里琢磨着什么时候DriverEntry的IRQL>DISPATCHER_LEVEL了?正巧同事经过,看了一眼我的代码说:在比较内存值前先判断一下页面是否有效的,应该能解决问题~我那个激动啊,原来如此,三下五除二将修改了代码为:

unsigned long MatchFunc(PKLDR_DATA_TABLE_ENTRY ldrDataTabEntry)
{
unsigned long found = 0x00UL, CodeFeatureLen = sizeof(CodeFeature) / sizeof(unsigned char), idx = 0;
UNICODE_STRING matchModName;
char* modBase = (char*)ldrDataTabEntry->DllBase;

RtlInitUnicodeString(&matchModName, L"ntoskrnl.exe");
if (RtlCompareUnicodeString(&matchModName, &ldrDataTabEntry->BaseDllName, TRUE) == 0)
{
for (; idx<(ldrDataTabEntry->SizeOfImage - CodeFeatureLen); idx++)
{
//比较内存前先判断地址是否有效
if ((MmIsAddressValidAddr)(&modBase[idx]))
{
//有效,然后再比较
if (memcmp(&modBase[idx], CodeFeature, CodeFeatureLen) == 0)
{
found = 0x01UL;
break;
}
}

}

}

return found;
}

再次加载驱动,程序还是在相同的地方出错了。说不通啊,我已经判断过页面的有效性了,能进到最内层的if进行页面比较就说明内存有效性检测已经通过了,难道是MmIsAddressValid出错了?百度了一圈,发现求助的人不少,更有人说内核API MmIsAddressValid不稳定,还引用了ddk help的原话:"

Even if MmIsAddressValid returns TRUE, accessing the address can cause page faults unless the memory has been locked down or the address is a valid nonpaged pool address."

带着这样的疑惑,我查看了wrk1.2 MmIsAddressValid的实现(我的机器是win7RTM32bit,所以只贴出源码中2级页表映射的内容):

BOOLEAN
MiIsAddressValid (
IN PVOID VirtualAddress,
IN LOGICAL UseForceIfPossible
)

{
PMMPTE PointerPte;
UNREFERENCED_PARAMETER (UseForceIfPossible);

if (MI_RESERVED_BITS_CANONICAL(VirtualAddress) == FALSE) {
return FALSE;
}

PointerPte = MiGetPdeAddress (VirtualAddress);
if (PointerPte->u.Hard.Valid == 0) {
return FALSE;
}

if (MI_PDE_MAPS_LARGE_PAGE (PointerPte)) {
return TRUE;
}

PointerPte = MiGetPteAddress (VirtualAddress);
if (PointerPte->u.Hard.Valid == 0) {
return FALSE;
}

if (MI_PDE_MAPS_LARGE_PAGE (PointerPte)) {
return FALSE;
}

return TRUE;
}

wrk用比较简单的方式检测页面的有效性:检测pte页表项的有效位p位是否置1,置1则认为有效,返回TRUE。这样检测有一个问题:如果代码以下面的流程执行可能会出错:

{
if(MmIsAddressValid(addr)) //在IRQL<DISPATCH_LEVEL时判断页面有效性
{ //通过有效性判断,同时addr被换出内存
KeRaiseIrql(DISPATCH_LEVEL); //页面无法换入
do access addr //访问已经被换出的内存,可能蓝屏
KeLowerIrql();
}
}

附:调换一下代码顺序可能就不会出错了(不过我还没有验证过),

{
{ //先提升IRQL,停止换页线程运行
KeRaiseIrql(DISPATCH_LEVEL);
if(MmIsAddressValid(addr)) //在IRQL>=DISPATCH_LEVEL时判断页面有效性
do access addr //通过有效性检测 这时页面就不会被换出,可以大胆的使用
KeLowerIrql();
}
}


可是,我的代码并没有提升IRQL,就算访问了换页出去的页面OS也会负责将页面重新换回。在这样的背景下通过  if(MmIsAddressValid(addr))有效性检测也不可能访问到无效的页面。最后,我在一个群里得到了比较靠谱的答案:nt模块有些页面被标志位Init或者Discard,如果程序访问了这样的页面也可能会触发缺页异常。我查看了一下nt模块的PE信息,的确发现有很多被标记为Discardable的节:

kd> !dh nt

SECTION HEADER #13
PAGEVRFD name
B18 virtual size
37B000 virtual address <-------------蓝屏时显示我正在访问的内存所在的节 mov eax,dword ptr [edx] ds:0023:82d81ffd=????????
C00 size of raw data
330A00 file pointer to raw data
0 file pointer to relocation table
0 file pointer to line numbers
0 number of relocations
0 number of line numbers
C0000040 flags
Initialized Data
(no align specified)
Read Write

SECTION HEADER #14
INIT name
44638 virtual size
37C000 virtual address
44800 size of raw data
331600 file pointer to raw data
0 file pointer to relocation table
0 file pointer to line numbers
0 number of relocations
0 number of line numbers
E2000020 flags
Code
Discardable <-------------------------------
(no align specified)
Execute Read Write

SECTION HEADER #16
.reloc name
1976C virtual size
3F6000 virtual address
19800 size of raw data
3AA400 file pointer to raw data
0 file pointer to relocation table
0 file pointer to line numbers
0 number of relocations
0 number of line numbers
42000040 flags
Initialized Data
Discardable <-------------------------------
(no align specified)
Read Only

另外,我查看了虚拟地址0x82d81ffd处的pte,发现虽然内存0x82d81ffd未被映射,但他的pte是有效的,比较奇怪:

kd> dd 82d81ffd L1
82d81ffd ????????
kd> !pte 82d81ffd
VA 82d81ffd
PDE at C06020B0 PTE at C0416C08
contains 00000000001D2063 contains 0000000002D81963
pfn 1d2 ---DA--KWEV pfn 2d81 -G-DA--KWEV

了解了这个知识点,我再次对程序进行了修改:先判断是否能获得物理内存,如果返回值为零,即可以获得物理内存再进行内存比较。但这种方式不尽人意,依然触发了异常,异常位置还是在memcmp处。很无奈,我只能再次分析dump文件。再次定位访问出错时CPU将要执行的指令

mov     eax,dword ptr [edx]  ds:0023:82d81ffd=????????

指令访问地址0x82d81ffd,这个地址是正好位于两个节的交界处,再加4B,就能从Section Header #13进入到Section Header #14,而Section Header #14的内存页面是标记为Discardable。由此,我猜想我在暴力搜索内存时只判断了搜索的起始位置的有效性,并未对结束位置的有效性进行判断;如果搜索的起始点在页末最后几个字节,结束位置位于这种被标记为Disacrdable的页面中,就很可能出错。

kd> !dh nt

SECTION HEADER #13
PAGEVRFD name
B18 virtual size
37B000 virtual address <----页面起始位于82a06000+37B000,页面对齐
...
Initialized Data
(no align specified)
Read Write

SECTION HEADER #14
INIT name
44638 virtual size
37C000 virtual address <----页面起始位于82a06000+37C000,页面对齐
...
Discardable <-------------------------------
(no align specified)
Execute Read Write

kd> !pte 82a06000+37B000 <----#13节的页面对应pte是有效的
VA 82d81000
PDE at C06020B0 PTE at C0416C08
contains 00000000001D2063 contains 0000000002D81963
pfn 1d2 ---DA--KWEV pfn 2d81 -G-DA--KWEV

kd> !pte 82a06000+37C000 <-----#14节的页面对应pte是全0pte
VA 82d82000
PDE at C06020B0 PTE at C0416C10
contains 00000000001D2063 contains 0000000000000000
pfn 1d2 ---DA--KWEV not valid

BugCheck 50, {82d82000, 0, 9453015e, 0} <-----BugCheck 0x50 Arg1 表明蓝屏时访问的内存地址 82d82000=82a06000+37C000 正好是#14节的开始

结合上面的分析,验证了我的猜想。这次我同时验证将比较的内存区域的首尾是否有效,再次加载果然没有蓝屏发生。


标签:AREA,00000000,FAULT,memcmp,enummod,内存,address,蓝屏,页面
From: https://blog.51cto.com/u_13927568/5831382

相关文章