SQL语句
1、DDL(数据定义语言)
1、创建,删除数据库
-- 1、创建数据库
create database shujia;
-- 指定编码创建
create database shujia DEFAULT charset utf8;
-- 2、获取数据库列表
show databases;
-- 3、查看数据库语句
show create database shujia;
-- 4、删除数据库
drop database shujia;
-- 5、切换数据库
use bigdata;
-- 查看当前使用的数据库
select database();
2、创建表
-- 1、创建表
-- 主键是自带索引的(索引:通过索引字段查询数据速度快)
-- DEFAULT CHARSETz指定编码格式
-- ENGINE=InnoDB :使用的引擎
create table students(
`id` int UNSIGNED AUTO_INCREMENT, -- 自增主键
`name` varchar(5) NOT NULL,
`age` tinyint,
`sex` varchar(2) NOT NULL,
`clazz` varchar(10),
PRIMARY KEY (id) -- 主键,唯一一般使用学号,身份证号,作为主键
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- 加载本地数据到表中
LOAD DATA local INFILE '/root/student.txt' INTO TABLE students FIELDS TERMINATED BY ',' ;
-- 2、删除表
drop table students;
3、修改表
-- 增加字段
ALTER TABLE students ADD c int;
-- 修改字段
ALTER TABLE students MODIFY c varchar(22);
--删除字段
ALTER TABLE students drop c;
-- 表重命名
rename table students to student;
rename table student to students;
-- 清空表
truncate students;
2、DML(数据处理语言)
1、插入数据(insert)
-- 1、指定字段插入一条数据
insert into students(id,name,age,sex,clazz) values(1500100839,'明雁桃',22,'女','理科五班');
insert into students(id,name,sex) values(1500100832,'张三','男');
-- 2、所有字段插入数据
insert into students values(1500100851,'白运盛',22,'男','理科六班');
-- 3、插入多条数
insert into students values
(1500100852,'寇凝丹',24,'女','文科二班'),
(1500100853,'林鸿朗',24,'男','理科四班');
--4、替换插入,如果不存在就插入,如果存在就更新
-- 根据主键判断数据是否存在
replace into students values (1500100855,'寇凝丹',25,'女','文科二班');
2、修改数据(update)
-- 1、更新所有的数据
update students set clazz='文科一班';
-- 2、指定条件更新
update students set clazz='文科二班',age=21,sex='男' where name='张三';
3、删除数据(delete)
-- 1、删除一条数据
delete from students where id='1500100855';
--2、删除所有数据
delete from students;
3、DQL(数据查询语言)
1、select
-- 1、获取所有字段
select * from students;
-- 2、获取指定字段
select id,name from students;
-- 3、去除重复数据
select distinct clazz from students;
--4、在select中进行计算
-- as 取别名
select id,name,age+10 as age from students;
2、字符串函数
-- 1、获取长度
select length("shujia");
-- 2、拼接字符串
select concat(id,'_',name) from students;
-- 3、替换字符串
-- 下标从1开始
select insert('shujia',3,2,'1111');
-- 4、转大小写
select lower('ShuJia');
select upper('ShuJia');
-- 5、截取字符串
-- 左边截取
select left(clazz,2) from students;
-- 右边截取
select right(clazz,2) from students;
-- 指定下标截取,指定开始的位置和截取的长度,下标从1开始
select substring('shujia',4,3);
-- 6、去掉字符串前后的空格
select trim(' shujia ');
-- 7、替换
select replace('shujia','shu','数');
--8、反转
select reverse('shujia');
3、数值函数
-- 1、取绝对值
select abs(-100);
--2、求二次方根
select sqrt(100);
-- 3、求余数
select 100%3
-- 4、向上取整
select ceil(12.23);
-- 5、向下取整
select floor(12.22);
-- 6、四舍五入
select round(123.666);
-- 生成随机数
-- 0-1
select rand();
--0-100
select rand()*100;
--50-100
select rand()*50+50;
4、where
--1、等值条件
-- > < <= >= = <> (大于、小于、大于(小于)等于、不等于)
select * from
students
where sex='男' and age=21
;
-- 2、不等值条件
select * from students where sex != '男';
-- 3、区间判断
select * from students where age between 22 and 23;
select * from students where age >= 22 and age <= 23;
--4、in包含判断
select * from students where clazz in ('文科一班','文科二班','文科四班');
select * from students where clazz not in ('文科一班','文科二班','文科四班');
--5、模糊匹配
select * from students where name like '%桃';
--6、不为空
select * from students where name is not null;
-- and or not
select * from
students
where (age > 22 and sex='男') or clazz is not null
;
-- 在where中使用函数
select * from students where substring(clazz,1,2)='理科';
5、聚合函数
1、count
-- count(DEPTNO):如果里面的字段为null不计数不为null才计数
-- count(*) 所有字段都为null时才不计数
-- count(1) 每行都计数
--1、统计表的数据量
select count(*) from students;
--2、分组统计
-- 统计班级的人数
select clazz,count(*) as num
from students
group by clazz;
select sex,count(*) as num
from students
group by sex;
--统计每个班级男生的人数
-- 编写sql的顺序(from --> where ---> group by ---> select)
select clazz,count(id) as num
from students
where sex='男'
group by clazz;
2、sum
-- 统计学生的总分
create table scores(
id int,
cid int,
score double
);
-- 加载本地数据到表中
LOAD DATA local INFILE '/root/score.txt' INTO TABLE scores FIELDS TERMINATED BY ',' ;
-- group by: 将相同的数据分到一个组内进行处理
-- 1、组内求和
select
id,
sum(score) as sum_score
from
scores
group by id;
--2、全局求和
select sum(score) from scores;
3、avg
--1、分组使用
select
clazz,avg(age) as avg_age
from students
group by
clazz;
-- 2、全局使用
select
avg(age) as avg_age
from students;
4、max/min
--1、组内获取最大值
select
clazz,max(age) as max_age
from students
group by
clazz;
-- 2、计算全校最高分
-- 使用子查询
select
max(sum_score) as max_score
from
(
select
id,sum(score) as sum_score
from
scores
group by id
) as a;
6、时间函数
-- 1、获取当前时间
select *, current_timestamp() from students
select now();
--2、日期格式化函数
select date_format(current_timestamp(),'%Y/%m/%d %H/%i/%S');
-- 3、字符串转换成日期
select str_to_date('20240903','%Y%m%d');
--4、计算日期的差值
select datediff('2026-09-03','2025-09-03');
--5、增加天数
select DATE_ADD('2026-09-03',interval 100 day);
--6、时间戳()
select unix_timestamp();
--7、将时间转换成时间戳
select unix_timestamp('2024-09-03 09:52:53');
--8、将时间戳转换成时间
select from_unixtime(1),select from_unixtime(1);
-- 9、指定时间的格式
select from_unixtime(1,'%Y/%m%d');
--计算两个时间相差多个小时
select (unix_timestamp('2024-09-03 09:52:53') - unix_timestamp('2024-09-01 09:52:53'))/3600;
-- 2024-09-03 09:52:53 --> 2024/09/03 09/52/53
-- hive中时间格式,yyyy:MMd:dd HH:mm:ss
select from_unixtime(unix_timestamp('2024-09-03 09:52:53'),'%Y/%m/%d %H/%i/%S');
6、having
-- 取出总分大于500分的
-- sql执行顺序(from --> where --> group by ---> select ---> having)
-- 所以在where中不能使用select中新的字段
select
id,sum(score) as sum_score
from
scores
where
sum_score > 500 -- 写法不对
group by
id;
-- 使用子查询
SELECT
*
FROM
(
SELECT
id,
sum(score) AS sum_score
FROM
scores
GROUP BY
id
) AS a
WHERE
sum_score > 500;
--使用having,分组之后进行过滤
select
id,sum(score) as sum_score
from
scores
group by
id
having
sum(score) > 500;
7、limit
-- 获取前10条
select * from students limit 10;
-- 从第10条开始,获取10条
select * from students limit 10,10;
8、order by
-- 1、安装总分排序
-- asc:升序,desc降序
-- sql执行顺序(from --> where --> group by --> select --> having --> order by -- limit)
select
id,sum(score) as sum_score
from
scores
group by id
order by sum_score asc
limit 10;
9、if
select
id,
name,
age,
-- 当条件返回True时取前一个值,当条件返回False时,取后一个值
if(sex='男',1,2) as sex
from
students;
10、case when
select
id,name,age,
case
when sex='男' then 1
when sex='女' then 2
else 3 end as sex
from
students
11、union
-- union all:不会去重
select * from students where age > 23
union all
select * from students where age < 22;
-- union:会去重
select * from students where name ='贡涵亮'
union
select * from students where name ='贡涵亮'
-- 使用limit需要加括号
(select * from students limit 1)
union all
(select * from students limit 1)
12、笛卡尔积
-- 左表的每一条数据都会和右表的每一条数据进行链接,会导致数据膨胀,性能很差
select * from
students,
scores
-- 创建表
create table names(
id int,
name varchar(10)
);
-- 插入数据
insert into names values
(1,'张三'),
(2,'李四'),
(3,'王五');
create table ages(
id int,
age int
);
insert into ages values
(2,24),
(3,25),
(4,26);
select * from names,ages;
13、join
-- 1、left join
-- 左关联,以左表为基础,如果右表没有会填充null
select * from
names as a
left join
ages as b
on a.id=b.id;
-- 2、right join
select * from
names as a
right join
ages as b
on a.id=b.id;
--3、inner jion
-- 两边都需要有才能关联上
select * from
names as a
inner join
ages as b
on a.id=b.id;
-- 直接join就相当于时inner join
select * from
names as a
join
ages as b
on a.id=b.id;
-- 基于笛卡尔积过滤
select * from
names as a,
ages as b
where a.id=b.id;
14、将结果保存到表中
-- 1、直接将结果保存到新的表中
-- 基于查询语句自动创建表
create table t_sum_scre as
select id,sum(score) as sum_score
from
scores
group by id;
--2、先创建表,再将结果插入到新的表中
create table t_clazz_num(
clazz varchar(10),
num bigint
);
-- 将结果保存到表中
insert into t_clazz_num
select
clazz,
count(1) as num
from
students
group by
clazz;
15、视图
-- 视图:视图不保存数据,保存的是一段sql逻辑,当插入视图时才会去计算得到结果,
-- 原表变了,视图的结果也会跟着变
create view v_sum_scre as
select id,sum(score) as sum_score
from
scores
group by id;
-- 可以像查询普通表一样查询视图
select * from v_sum_scre;
-- 视图的作用:可以将一部分公共的逻辑封装到视图中,可以多次使用
create view student_scores as
select a.id,a.name,a.age,a.sex,a.clazz,b.cid,b.score
from
students as a
join
scores as b
on a.id=b.id;
-- 计算每个班级总分的平均分
select
clazz,
round(avg(sum_score),2) as avg_scre
from
(
select
id,
clazz,
sum(score) as sum_score
from
student_scores
group by -- 分组计算总分
id,clazz
) as a
group by -- 分组计算平均分
clazz;
16、查看sql执行计划
-- 执行计划
explain select
a.DEPTNO,a.DNAME,a.LOC,
count(b.DEPTNO) as num
from
dept as a
left join
emp as b
on a.DEPTNO=b.DEPTNO
group by a.DEPTNO,a.DNAME,a.LOC;
explain select a.*,if(b.num is null ,0,b.num) as num from
dept as a
left join
(
select DEPTNO,
count(1) as num from
emp
group by DEPTNO
) as b
on a.DEPTNO=b.DEPTNO;
4、DCL(数据控制语言)
-- 1、创建用户
create user 'shujia'@'%' identified WITH mysql_native_password by '123456';
-- 刷新权限
flush privileges;
-- 2、修改用户
rename user 'shujia'@'%' to 'bigdata'@'%';
-- 查看权限
select user,host,authentication_string from mysql.user;
--3、修改密码
UPDATE mysql.user SET authentication_string=password('123456') WHERE user='bigdata' AND host='%';
-- 直接修改密码
SET PASSWORD=password('123456');
-- 4、删除用户
drop user 'bigdata'@'%';
--5、授予某个用户所有表所有库的权限
grant all privileges on *.* to 'shujia'@'%';
-- 回收权限
revoke all on *.* from 'shujia'@'%';
-- 授予某个用户某个表的查询权限
-- SELECT, INSERT, UPDATE, DELETE
grant SELECT on bigdata.students to 'shujia'@'%';
grant INSERT, UPDATE, DELETE on bigdata.students to 'shujia'@'%';
-- 查看当前用户权限
show grants for 'shujia'@'%';
5、索引
1、创建大表
-- 创建表
create table cars(
card varchar(10) comment '车牌号',
city varchar(10) comment '城市编号',
county varchar(10) comment '区县编号',
carid varchar(20) comment '卡口编号',
mid varchar(10) comment '摄像头编号',
fx varchar(10) comment '方向',
road varchar(10) comment '道路编号',
ts bigint comment '时间',
speed double comment '速度'
);
LOAD DATA local INFILE '/root/cars_sample.txt' INTO TABLE cars FIELDS TERMINATED BY ',' ;
-- 查询表
select * from cars where card='皖AAF8Y2';
--统计每个城市的车流量
select city,count(1) as flow
from cars group by city;
2、普通索引
索引的本质是空间换时间,
更新数据表索引表也需要同步更新,会影响增删改的效率压缩(cpu计算时间换硬盘的存储空间)
-- 1、创建索引
CREATE INDEX city_index ON cars (city);
select city,count(city) as flow
from cars group by city;
CREATE INDEX county_index ON cars (county);
-- 2、删除索引
drop index city_index on cars;
CREATE INDEX card_index ON cars (card);
-- 如果不创建索引,会进行全表扫描
--创建索引后,查询数据时会先查询索引表,再查询数据表,避免全表扫描
select * from cars where card='皖AAF8Y2';
drop index card_index on cars;
-- 创建多字段索引
CREATE INDEX city_card_index ON cars (city,card);
select city,count(distinct card) as flow
from cars group by city;
3、唯一索引
-- 创建唯一索引
CREATE UNIQUE INDEX id_index ON students (id);
4、主键索引
create table students(
`id` int UNSIGNED AUTO_INCREMENT, -- 自增主键
`name` varchar(5) NOT NULL,
`age` tinyint,
`sex` varchar(2) NOT NULL,
`clazz` varchar(10),
PRIMARY KEY (id) -- 主键,唯一一般使用学号,身份证号,作为主键
);
6、事务
create table users(
id BIGINT,
username VARCHAR(20),
ye DECIMAL(10.0)
);
insert into users values(100,'张三',10000);
insert into users values(200,'李四',2000);
--1、开启事务
begin;
-- 2、执行多条sql
-- -- 开启事务之后修改数据会对表增加锁,再其它事务中不能对这个表进行修改的操作
update users set ye=ye-100 where username='张三';
update users set ye=ye+100 where username='李四';
--3、提交事务
commit;
--回滚事务
ROLLBACK;
-- 查看进程
SHOW PROCESSLIST;-
-- 杀掉进程
kill [id]
-- 禁止自动提交
SET AUTOCOMMIT=0;
-- 开启自动提交
SET AUTOCOMMIT=1
6、在shell使用使用sql
# -e :执行sql
students=`mysql -uroot -p123456 -e"select * from bigdata.students"`
echo $students
# 编写sql脚本,统计班级的人数,将结果导出到文件
mysql -uroot -p123456 -e"select clazz,count(1) from bigdata.students group by clazz" > clazz_num.txt
for clazz in `cat student.txt | awk -F',' '{print $5}' | sort -u`
do
mysql -uroot -p123456 -e"select * from bigdata.students where clazz='${clazz}'" > ${clazz}.txt
done
4、练习
1、统计每个班级每个年龄的学生人数
select
clazz,
age,
count(1) as num
from
students
group by
clazz,
age;
2、统计每个班级每个年龄的学生人数,统计性别为男的学生人数
select clazz,age,count(1) as num
from
students
where sex='男'
group by clazz,age;
3、统计每个班级每个年龄的学生人数,统计性别为男的学生人数,取出人数大于10的
select clazz,age,count(1) as num
from
students
where sex='男'
group by clazz,age
having num > 10;
3、统计每个班级每个年龄的学生人数,统计性别为男的学生人数,取出人数大于10的,人数按照降序排序,取前两个
select clazz,age,count(1) as num
from
students
where sex='男'
group by clazz,age
having num > 10
order by num desc
limit 2;
4、计算偏科最严重的学生
-- 不考虑总分的影响
select
id,std(score) as score_std
from
scores
group by
id
order by
score_std desc
limit
10;
CREATE table subject(
cid int,
cname varchar(4),
max_score double
);
LOAD DATA local INFILE '/root/subject.txt' INTO TABLE subject FIELDS TERMINATED BY ',' ;
--1、关联科目表获取科目总分, 对分数进行归一化
SELECT
e.id,
e.name,
e.clazz,
d.score
FROM
(
SELECT
a.id,
std(a.score / b.max_score) AS std_score
FROM
scores AS a
JOIN subject AS b ON a.cid = b.cid
GROUP BY
a.id
ORDER BY
std_score DESC
LIMIT 10
) AS c
JOIN scores AS d ON c.id = d.id
JOIN students AS e ON c.id = e.id;
-- 使用in实现
SELECT
b.id,b.name,b.clazz,a.score
FROM
scores as a
join students as b
on a.id=b.id
WHERE
a.id IN ( -- 在in中使用子查询,子查询只能有一个字段
SELECT
id
FROM
(
SELECT
a.id,
std(a.score / b.max_score) AS std_score
FROM
scores AS a
JOIN subject AS b ON a.cid = b.cid
GROUP BY
a.id
ORDER BY
std_score DESC
LIMIT 10
) AS a
);
5、统计文理科分别多少人
-- 直接在分组时处理数据
select
substring(clazz,1,2) as t,
count(1) as num
from
students
group by
substring(clazz,1,2)
--使用子查询
select
t,count(1) as num
from(
select substring(clazz,1,2) as t from students
)as a
group by t;
6、统计学生的总分,输出,姓名和总分
-- 1、先计算总分,再关联学生表
SELECT
b.name,
a.sum_score
FROM
(
SELECT
id,
sum(score) AS sum_score
FROM
scores
GROUP BY
id
) AS a
JOIN students AS b ON a.id = b.id;
--2、先关联学生表再计算总分
-- sql执行顺序(from ---> join --> on ---> where ---> group by ---> select --> having ---> order by ---> limit)
SELECT
NAME,
sum(score) AS sum_score
FROM
students AS a
JOIN scores AS b ON a.id = b.id
GROUP BY
NAME;
标签:语句,--,clazz,students,score,SQL,id,select
From: https://www.cnblogs.com/lsb021214/p/18411057