gv$active_session_history会每秒钟将数据库所有节点的Active Session采样一次,而dba_hist_active_sess_history则会将gv$active_session_history里的数据每10秒采样一次并持久化保存。
基于这个特征,我们可以通过分析dba_hist_active_sess_history的Session采样情况,来定位问题发生的准确时间范围,并且可以观察每个采样点的top event和top holder。
- 一些容易误解含义和需要关注的列说明
SEQ#
sid和serial#相同的event并不代表是同一个event等待,有时候查看等待事件需要关注这个列,这列值相同代表相同的event上的等待。
*
SESSION_STATE
waiting代表等待,此时结合event才可以说明等待在这个等待事件上,否则则是on cpu。这个问题在v$session上会经常出现,v$session.state不是waiting时,event代表上个已经经历过的等待,需要注意区分。
*
WAIT_TIME
当等于0时,表示当前在等待。当这个值是非0值时,代表上个event的等待时间。
*
TIME_WAITED
结合session_state, 当session_states值是waiting时,代表花在等待上的时间,单位是microseconds,如果采样的数据不止一行,则在最后一行统计累积时间。
*
BLOCKING_SESSION、BLOCKING_INST_ID
阻塞者会话和阻塞者实例,对于有些等待这个值是不准的,还是要根据具体的P1和P2值的含义去计算
*
TOP_LEVEL_SQL_ID
top level调用,有时候可以根据这个列查出是什么存储过程中调用的SQL。
- 导出ASHDUMP
依据gv$active_session_history
create table m_ash_0728 tablespace USERS as
Select *
From gv$active_session_history
Where sample_time Between
to_timestamp('2017/07/28 13:00', 'yyyy/mm/dd hh24:mi') And
to_timestamp('2017/07/28 15:00', 'yyyy/mm/dd hh24:mi');
create table m_sqltext tablespace users as
Select * From gv$sql;
*
依据dba_hist_active_sess_history
create table m_ash_0728 tablespace USERS as
Select *
From dba_hist_active_sess_history
Where sample_time Between
to_timestamp('2017/07/28 13:00', 'yyyy/mm/dd hh24:mi') And
to_timestamp('2017/07/28 15:00', 'yyyy/mm/dd hh24:mi');
create table m_sqltext tablespace users as
Select * From dba_hist_sqltext;
导出
exp userid=\'/ as sysdba\' file=ashdata.dmp tables='m_ash_0728','m_sqltext'
导入
imp \'/ as sysdba\' file=ashdata.dmp log=ashdata.log full=y
- 常用确认问题流程
- 查看ASH时间范围
set line 160 pages 999
col min_time for a25
col max_time for a25
alter session set nls_timestamp_format='yyyy-mm-dd hh24:mi:ss.ff';
Select instance_number,
Min(sample_time) min_time,
Max(sample_time) max_time,
Count(*) session_count
From m_ash_0803
Group By instance_number
Order By instance_number;
- 确认问题发生的精确时间范围
set line 160 pages 999
col sample_time for a25
alter session set nls_timestamp_format='yyyy-mm-dd hh24:mi:ss.ff';
Select instance_number, sample_id, sample_time, Count(*) session_count
From m_ash_0722_2 t
Group By instance_number, sample_id, sample_time
Order By instance_number, sample_time;
- 确定每个采样点的top n event
Select t.sample_id,
t.sample_time,
t.instance_number,
t.event,
t.session_state,
t.c session_count
From (Select t.*,
rank() over(Partition By instance_number, sample_time Order By c Desc) r
From (Select /*+ parallel(8) */
t.*,
Count(*) over(Partition By instance_number, sample_time, event) c,
row_number() over(Partition By instance_number, sample_time, event Order By 1) r1
From m_ash_2 t
Where sample_time >
to_timestamp('2017-07-22 01:25:01',
'yyyy-mm-dd hh24:mi:ss')
And sample_time <
to_timestamp('2017-07-22 02:42:05',
'yyyy-mm-dd hh24:mi:ss')) t
Where r1 = 1) t
Where r < 5
Order By instance_number, sample_time, r;
这个例子是TOP 4
- 观察每个采样点的等待链
Select /*+ parallel(8) */
Level lv,
connect_by_isleaf isleaf,
connect_by_iscycle iscycle,
t.sample_id,
t.sample_time,
t.instance_number,
t.session_id,
t.sql_id,
t.session_type,
t.event,
t.session_state,
t.blocking_inst_id,
t.blocking_session,
t.blocking_session_status
From m_ash_0722_2 t
Where sample_time >
to_timestamp('2017-07-22 22.02.08', 'yyyy-mm-dd hh24:mi:ss')
And sample_time <
to_timestamp('2017-07-22 22.32.31', 'yyyy-mm-dd hh24:mi:ss')
Start With blocking_session Is Not Null
Connect By nocycle
Prior sample_time = sample_time
/*and ((prior sample_time) - sample_time between interval '-1'
second and interval '1' second)*/
And Prior blocking_inst_id = instance_number
And Prior blocking_session = session_id
And Prior blocking_session_serial# = session_serial#
Order Siblings By sample_time;
- 基于上一步的原理来找出每个采样点的最终top holder
Select t.lv,
t.iscycle,
t.sample_id,
t.sample_time,
t.instance_number,
t.session_id,
t.sql_id,
t.session_type,
t.event,
t.seq#,
t.session_state,
t.blocking_inst_id,
t.blocking_session,
t.blocking_session_status,
t.c blocking_session_count
From (Select t.*,
row_number() over(Partition By instance_number, sample_time Order By c Desc) r
From (Select t.*,
Count(*) over(Partition By instance_number, sample_time, session_id) c,
row_number() over(Partition By instance_number, sample_time, session_id Order By 1) r1
From (Select /*+ parallel(8) */
Level lv,
connect_by_isleaf isleaf,
connect_by_iscycle iscycle,
t.*
From m_ash_0722_2 t
Where sample_time >
to_timestamp('2017-07-22 22.02.08',
'yyyy-mm-dd hh24:mi:ss')
And sample_time <
to_timestamp('2017-07-22 22.32.31',
'yyyy-mm-dd hh24:mi:ss')
Start With blocking_session Is Not Null
Connect By nocycle
Prior sample_time = sample_time
/*and ((prior sample_time) - sample_time between interval '-1'
second and interval '1' second)*/
And Prior blocking_inst_id = instance_number
And Prior blocking_session = session_id
And Prior
blocking_session_serial# = session_serial#) t
Where t.isleaf = 1) t
Where r1 = 1) t
Where r < 5
Order By sample_time, r;
- 整体一个时间段的等待链
With ash As
(Select /*+ materialize*/
*
From m_ash_0722_2
Where sample_time >
to_timestamp('2017-07-22 22:02:00', 'yyyy-mm-dd hh24:mi:ss')
And sample_time <
to_timestamp('2017-07-22 22:35:00', 'yyyy-mm-dd hh24:mi:ss')),
chains As
(Select instance_number,
session_id,
Level lvl,
sys_connect_by_path('inst ' || instance_number || ' ' || sql_id || ' ' ||
event,
' -> ') path,
connect_by_isleaf isleaf
From ash
Start With event = 'log file sync'
Connect By nocycle(Prior blocking_session = session_id
And Prior blocking_session_serial# = session_serial#
And Prior blocking_inst_id = instance_number
And Prior sample_id = sample_id
/*And ((Prior sample_time) - sample_time Between Interval '-1'
Second And Interval '1' Second)*/))
Select instance_number,
lpad(round(ratio_to_report(Count(*)) over() * 100) || '%', 5, ' ') "%This",
Count(*) samples,
path
From chains
Where isleaf = 1
Group By instance_number, path
Order By samples desc;
- 举例
基于ash查询enq: TX - row lock contention(类似队列锁性质的都可以用这个方法)
- 查看snap_id
select snap_id,
to_char(begin_interval_time, 'yyyymmdd hh24:mi:ss') begin_interval_time
from wrm$_snapshot
where begin_interval_time between to_date('20151029 19', 'yyyymmdd hh24') and
to_date('20151029 20', 'yyyymmdd hh24')
and instance_number = 1
order by 2;
- 根据snap_id, 结果按时间排序
select to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event, count(*)
from dba_hist_active_sess_history
where snap_id = 188
and wait_class <> 'Idle'
and event = 'enq: TX - row lock contention'
and instance_number = 2
group by to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event
order by 1;
- 代入上一步的时间点,查询blocking_session并按count排序
select blocking_session, count(*), event
from dba_hist_active_sess_history
where snap_id = 188
and to_char(sample_time, 'yyyymmdd hh24:mi:ss' = '20150612 12:00:06'
and wait_class <> 'Idle'
and event = 'enq: TX - row lock contention'
group by blocking_session, event
order by 2;
- 代入查到的sid,看在snap范围的blocking_session经历的event
select session_id,
user_id,
sql_id,
blocking_session,
event,
to_char(sample_time, 'yyyymmdd hh24:mi:ss')
from dba_hist_active_sess_history
where snap_id = 188
and to_char(sample_time, 'yyyymmdd hh24:mi:ss') <= '20150612 12:00:06'
and to_char(sample_time, 'yyyymmdd hh24:mi:ss') >= '20150612 11:00:26'
and wait_class <> 'Idle'
and instance_number = 2
and session_id = 2622;
- 发生阻塞的主要SQL
with tmp as
(select /*+ materialize */distinct sql_id sqlid
from dba_hist_active_sess_history
where snap_id = 188
and to_char(sample_time, 'yyyymmdd hh24:mi:ss') <= '20170301 08:43:45'
and to_char(sample_time, 'yyyymmdd hh24:mi:ss') >= '20170301 08:04:39'
and wait_class <> 'Idle'
and instance_number = 2
and event = 'enq: TX - row lock contention')
select distinct parsing_schema_name, sql_id, sql_text
from v$sql
where sql_id in (select sqlid from tmp);
标签:总结,分析方法,number,sample,instance,session,time,ASH,id
From: https://blog.51cto.com/u_13482808/6473044