CDH集群日志清理
一、查看磁盘占用情况
df -h
二、查看日志占用情况
cdh的各种组件日志一般在 /var/log 目录下,因此需要关注“/”根目录,
查看 /var/log 下使用空间较大的文件夹,并又大到小排列
cd /var/log/
du -s ./* |sort -nr
还有一个是 Cloudera Management Service服务产生的日志,存在/var/lib/...
cd /data/var/lib/cloudera-service-monitor
du -s ./* | sort -nr
三. 清理日志
清理cm、cdh组件的日志数据
rm -rf /data/var/log/cloudera-scm-eventserver/*.out.*
rm -rf /data/var/log/cloudera-scm-firehose/*.out.*
rm -rf /data/var/log/cloudera-scm-agent/*.log.*
rm -rf /data/var/log/cloudera-scm-agent/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.log.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-httpfs/*.out.*
rm -rf /data/var/log/hadoop-kms/*.out.*
rm -rf /data/var/log/hadoop-mapreduce/*.out.*
rm -rf /data/var/log/hadoop-yarn/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.audit.*
rm -rf /data/var/log/flume-ng/*.out.*
rm -rf /data/var/log/solr/*.out.*
rm -rf /data/var/log/solr/solr_gc.log.*
rm -rf /data/var/log/zookeeper/*.log.*
rm -rf /data/var/log/impalad/*.log.*
rm -rf /data/yarn/nm/usercache/*/filecache/*
rm -rf /data/azkaban/projects/*
清理监控服务的数据
rm -rf /data/var/lib/cloudera-host-monitor/ts/*/partition*/*
rm -rf /data/var/lib/cloudera-service-monitor/ts/*/partition*/*
清理hdfs回收站数据
-- 查看回收站文件大小
hadoop fs -du -h -s /user/*/.Trash/*
-- 清理回收站内容(需2步,步骤1其他账号回收站内容移动到root下,步骤二清理root下回收站内容)
hadoop fs -rm -r /user/*/.Trash/*
hadoop fs -rm -r /user/root/.Trash/Current
四. 自动化脚本
vim cleanLog.sh
#!/bin/bash
rm -rf /data/var/lib/cloudera-host-monitor/ts/*/partition*/*
rm -rf /data/var/lib/cloudera-service-monitor/ts/*/partition*/*
rm -rf /data/var/log/cloudera-scm-eventserver/*.out.*
rm -rf /data/var/log/cloudera-scm-firehose/*.out.*
rm -rf /data/var/log/cloudera-scm-agent/*.log.*
rm -rf /data/var/log/cloudera-scm-agent/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.out.*
rm -rf /data/var/log/cloudera-scm-server/*.log.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-httpfs/*.out.*
rm -rf /data/var/log/hadoop-kms/*.out.*
rm -rf /data/var/log/hadoop-mapreduce/*.out.*
rm -rf /data/var/log/hadoop-yarn/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.out.*
rm -rf /data/var/log/hadoop-hdfs/*.audit.*
rm -rf /data/var/log/flume-ng/*.out.*
rm -rf /data/var/log/solr/*.out.*
rm -rf /data/var/log/solr/solr_gc.log.*
rm -rf /data/var/log/zookeeper/*.log.*
rm -rf /data/var/log/impalad/*.log.*
rm -rf /data/yarn/nm/usercache/*/filecache/*
rm -rf /data/azkaban/projects/*
crontab -e
#设置每周一的凌晨1点执行
00 01 * * 1 sh /root/clearLog.sh
标签:log,CDH,rf,集群,var,rm,日志,data,out
From: https://www.cnblogs.com/atao-BigData/p/17775848.html