[20240827]分析为什么出现library cache lock等待事件2.txt
--//前几天一直在分析如果表不存在的情况下,密集执行为什么出现library cache lock等待事件,而且出现的mode=2(共享模式),按照道
--//理不应该阻塞,做一个分析.
1.环境:
SCOTT@book01p> @ver2
==============================
PORT_STRING : x86_64/Linux 2.4.xx
VERSION : 21.0.0.0.0
BANNER : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID : 0
PL/SQL procedure successfully completed.
2.首先定位该对象handle address,编写gdb脚本:
$ cat lcl.gdb
set $lk = 0
break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rdx ,$rcx
echo kglnaobj address:
x/s $rdx+0x1c8
finish
end
--//session 1:
SCOTT@book01p(290,36393)> @ spid
SID SERIAL# PROCESS SERVER SPID PID P_SERIAL# C50
---- ------- ------- --------- ---- --- ---------- --------------------------------------------------
290 36393 3803 DEDICATED 3805 62 2 alter system kill session '290,36393' immediate;
--//window 1: (注:表示打开新的bash shell终端,与上面区分开来)
$ rlgdb -f -p 3805 -x lcl.gdb
...
--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;
--//window 1:按不断c继续:
(gdb) c
Continuing.
kgllkal count 04 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118: "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//出现"aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"记下handle address: 000000006a65df50.
--//退出gdb界面.
--//修改lcl.gdb文件;
$ cat lcl.gdb
set $lk = 0
break kgllkal if ( $rdx==0x000000006a65df50 )
#break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rdx ,$rcx
echo kglnaobj address:
x/s $rdx+0x1c8
finish
end
--//window 1,再次重启gdb检测:
$ rlgdb -f -p 3805 -x lcl.gdb
--//session 2:
SCOTT@book01p(151,22503)> @ spid
SID SERIAL# PROCESS SERVER SPID PID P_SERIAL# C50
---- -------- ------- --------- ---- --- ---------- --------------------------------------------------
151 22503 4065 DEDICATED 4067 65 2 alter system kill session '151,22503' immediate;
--//window 2:
--//window 1检测session 1.window 2检测session 2.
$ rlgdb -f -p 4067 -x lcl.gdb
--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;
--//挂起!!
--//window 1,按c继续:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
kgllkal count 01 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118: "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//停在kglLock.
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
290 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
--//session 2:
SCOTT@book01p(151,22503)> select count(1) from deptxxx;
--//挂起!!
--//window 2,按c继续:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
kgllkal count 01 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118: "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//停在kglLock.
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
290 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
151 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
--//现在sid=151,290同时持有00000000701E7A28的library cache lock,mode=2,说明共享模式没有问题.
SYS@book> @ ashtop event,p1,p1hex,p3hex "event='library cache lock'" &10s
no rows selected
--//现在并没有阻塞.出现等待事件在library cache lock.
--//window 1,按c继续:
(gdb) c
Continuing.
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
290 0 3 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
151 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
--//sid=290开始释放library cache lock,但是还没有释放,KGLLKMOD=0,KGLLKREQ=3,请求mode=3.
--//window 1,如果等几十秒,你可以发现如下信息;
(gdb) c
Continuing.
Program received signal SIGUSR2, User defined signal 2.
0x00007f65c72a0fca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
/usr/src/debug/glibc-2.17-c758a686/sysdeps/unix/syscall-template.S:81:3374:beg:0x7f65c72a0fca
--//sesson 1进入休眠模式,注意信号semtimedop.
(gdb) bt 8
#0 0x00007f65c72a0fca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
#1 0x0000000015339805 in sskgpwwait ()
#2 0x0000000015334595 in skgpwwait ()
#3 0x000000001498b09e in ksliwat ()
#4 0x000000001498a4ba in kslwaitctx ()
#5 0x000000000591f453 in kglLockWait ()
#6 0x00000000058f9e65 in kglUpgradeLock ()
#7 0x0000000003ffce7c in kksGetBuildLock ()
(More stack frames follow...)
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
290 0 3 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
151 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
SYS@book> @ ashtop event,p1,p1hex,p3hex,sid 1=1 &10s
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1 P1HEX P3HEX SID FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ---------- ----------------- ----------------- ---------- ------------------- ------------------- ---------- -------- -----------
10 1.0 37% | library cache lock 1785061200 000000006A65DF50 0000000000520003 290 2024-08-27 10:00:27 2024-08-27 10:00:36 1 10 10
10 1.0 37% | 1413697536 151 2024-08-27 10:00:27 2024-08-27 10:00:36 1 10 10
5 .5 19% | 100 132 2024-08-27 10:00:27 2024-08-27 10:00:36 1 5 5
2 .2 7% | 100 261 2024-08-27 10:00:29 2024-08-27 10:00:34 1 2 2
--//实际上现在sid=290,P3HEX=0000000000520003,也就是释放时是尝试持有mode=3,排他模式.
SYS@book> @ wcx &10s
-- Display ASH Wait Chain Signatures script v0.7 by Tanel Poder ( http://blog.tanelpoder.com )
%This SECONDS AAS WAIT_CHAIN FIRST_SEEN LAST_SEEN
------ ---------- ------- ------------------------------------------------------------------------ ------------------- -------------------
38% 10 1.0 -> ,,@=>151,22503,@1=> 2024-08-27 10:00:55 2024-08-27 10:01:04
38% 10 1.0 -> 151,22503,@1=>290,36393,@1=>library cache lock -> ,,@=>151,22503,@1=> 2024-08-27 10:00:55 2024-08-27 10:01:04
19% 5 .5 -> ,,@=>132,3926,@1=> 2024-08-27 10:00:57 2024-08-27 10:01:02
4% 1 .1 -> ,,@=>270,42455,@1=> 2024-08-27 10:01:04 2024-08-27 10:01:04
--//这时等待链条是 151->290->library cache lock->151.阻塞者是sid=290.
--//window 2,按c继续:
(gdb) c
Continuing.
Program received signal SIGUSR2, User defined signal 2.
0x0000000015363ad0 in kglLock ()
--//session 2停在kglLock,到session 2释放library cache lock.
(gdb) c
Continuing.
--//等一小段时间...
Program received signal SIGUSR2, User defined signal 2.
0x00007fd3f17fdfca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
/usr/src/debug/glibc-2.17-c758a686/sysdeps/unix/syscall-template.S:81:3374:beg:0x7fd3f17fdfca
--//sesson 2进入休眠模式,注意信号semtimedop.
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
151 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
290 3 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
--//这时sid=290持有mode=3.sid=151的KGLLKREQ=2.
--//为什么这时sid=151的KGLLKREQ=3呢?也许sid=290,持有mode=3,只能请求mode=2.
SYS@book> @ ashtop event,p1,p1hex,p3hex,sid 1=1 &10s
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1 P1HEX P3HEX SID FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ---------- ----------------- ----------------- ---------- ------------------- ------------------- ---------- -------- -----------
10 1.0 45% | library cache lock 1785061200 000000006A65DF50 0000000000520002 151 2024-08-27 10:05:47 2024-08-27 10:05:56 1 10 10
10 1.0 45% | library cache lock 1785061200 000000006A65DF50 0000000000520003 290 2024-08-27 10:05:47 2024-08-27 10:05:56 1 10 10
1 .1 5% | 3 5 2024-08-27 10:05:54 2024-08-27 10:05:54 1 1 1
1 .1 5% | 100 132 2024-08-27 10:05:53 2024-08-27 10:05:53 1 1 1
--//sid=151 看到P3hex=0000000000520002.
--//window 1,按c继续:
(gdb) c
Continuing.
--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;
select count(1) from deptxxx
*
ERROR at line 1:
ORA-00942: table or view does not exist
--//报错,执行完成!!
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
151 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
--//sid=151 现在持有mode=2.
--//window 2,按c继续:
(gdb) c
Continuing.
--//session 2:
SCOTT@book01p(151,22503)> select count(1) from deptxxx;
select count(1) from deptxxx
*
ERROR at line 1:
ORA-00942: table or view does not exist
--//报错,执行完成!!
--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
no rows selected
3.简单总结:
--//实际上问题在于后续的kglLock函数调用,当存在多个持有mode=2的情况,释放lock,请求的mode=3排他模式方式,以前一直以为kgllkal
--//持有的mode=2,不会存在阻塞.问题实际上在于后续的操作.
--//更正:实际上即使一个会话,也是请求mode=3的排他模式.
$ head -4 m2.sh ;tail -4 m2.sh
#! /bin/bash
sleep $(echo $1/500 | bc -l )
sqlplus -s -l scott/book@book01p <<EOF >/dev/null
set feedback off
/
/
quit
EOF
--//中间20000个/.
$ zzdate ; seq 10 | xargs -IQ -P 5 ./m2.sh Q ;zzdate
trunc(sysdate)+10/24+43/1440+01/86400 == 2024/08/27 10:43:01
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
145 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
277 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
397 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
24 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
30 3 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
SID KGLLKMOD KGLLKREQ KGLNAOBJ KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
17 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
24 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
397 0 2 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
145 0 3 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50
277 2 0 aa0b8a5e997323ce2d65f9b7af91ed4f 000000006A65DF50