【mysql】 一次索引优化实验

一次索引优化实验

近期由于工作需要,对索引实际使用又进行了一次思考

索引实验

参考相关:

存储过程参考: https://blog.csdn.net/CrayonShinChaner/article/details/121820454
时间字段加索引: https://blog.csdn.net/qq_33934427/article/details/105078236
数据量查询得到的数据只占总数据量的5% 索引失效 :https://blog.csdn.net/CSDNcircular/article/details/107253747

环境

Arch Linux x86_64
CPU: Intel i7-6700K (4) @ 4.007GHz
Memory: 653MiB / 3888MiB

SQL准备

靶场表

1
2
3
4
5
6
7
8
9
10
11
12
CREATE TABLE `test2` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`send_time` datetime DEFAULT NULL,
`event_type` varchar(255) DEFAULT NULL,
`event_name` varchar(255) DEFAULT NULL,
`status` varchar(255) DEFAULT NULL,
`file_name` varchar(255) DEFAULT NULL,
`model_code` varchar(255) DEFAULT NULL,
`user_id` varchar(255) DEFAULT NULL,
`card_id` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

快速插入数据存储过程

1
2
3
4
5
6
7
8
9
10
11
-- 无参创建
CREATE PROCEDURE demo1()
BEGIN
DECLARE i INT DEFAULT 0;
START TRANSACTION;
WHILE i<1000000 DO
INSERT INTO test1(send_time,event_type,event_name,status,model_code,user_id,card_id,file_name) VALUES(from_unixtime(1541302365+FLOOR(rand()*154130236),"%Y-%m-%d %H:%i:%s"),randStr(20),randStr(20),randNum(1),randNum(2),UUID(),UUID(),UUID());
SET i=i+1;
END WHILE;
COMMIT;
END

随机生成n个随机数字

1
2
3
4
5
6
7
8
9
10
11
12
13
DELIMITER $$
CREATE FUNCTION randNum(n int) RETURNS VARCHAR(255)
BEGIN
DECLARE chars_str varchar(20) DEFAULT '0123456789';
DECLARE return_str varchar(255) DEFAULT '';
DECLARE i INT DEFAULT 0;
WHILE i < n DO
SET return_str = concat(return_str,substring(chars_str , FLOOR(1 + RAND()*10 ),1));
SET i = i +1;
END WHILE;
RETURN return_str;
END $$
DELIMITER;

随机生成手机号函数

1
2
3
4
5
6
7
8
9
10
11
12
13
DELIMITER $$
CREATE FUNCTION generatePhone() RETURNS varchar(20)
BEGIN
DECLARE head char(3);
DECLARE phone varchar(20);
DECLARE bodys varchar(100) default "130 131 132 133 134 135 136 137 138 139 186 187 189 151 157";
DECLARE starts int;
SET starts = 1+floor(rand()*15)*4;
SET head = trim(substring(bodys,starts,3));
SET phone = trim(concat(head,randNum(8)));
RETURN phone;
END $$
DELIMITER ;

随机生成用户名函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
DELIMITER $$
CREATE FUNCTION `randStr`(n INT) RETURNS varchar(255) CHARSET utf8mb4
DETERMINISTIC
BEGIN
DECLARE chars_str varchar(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
DECLARE return_str varchar(255) DEFAULT '' ;
DECLARE i INT DEFAULT 0;
WHILE i < n DO
SET return_str = concat(return_str, substring(chars_str, FLOOR(1 + RAND() * 62), 1));
SET i = i + 1;
END WHILE;
RETURN return_str;
END$$
DELIMITER;

实验过程

未加索引
100W数据大小: 158.69MB

1
2
3
4
5
6
7
8
9
-- cos: 0.574S ,total: 40939
SELECT * from test2 t where t.send_time BETWEEN '2020-01-17' AND '2020-03-30'
-- cos: 0.474S ,total: 9974
SELECT * from test2 t where t.model_code=33;
-- cos: 0.460 , total:1611
SELECT * from test2 t WHERE t.event_type = 303;
-- cos: 0.695 , total:100040
SELECT * from test2 t WHERE t.`status` = 5;

添加索引
100W数据大小: 158.69MB + 81.67MB

1
2
3
4
5
6
7
8
9
-- cos: 0.427S ,total: 353230  时间字段加索引主要是看范围内数据量的大小,目前看区别不大
SELECT * from test2 t where t.send_time BETWEEN '2020-01-17' AND '2020-03-30'
-- cos: 0.457S ,total: 9974 数据量查询得到的数据只占总数据量的5% 索引失效
SELECT * from test2 t where t.model_code=33;
-- cos: 0.432 , total:1611
SELECT * from test2 t WHERE t.event_type = 303;
-- cos: 0.642 , total:100040
SELECT * from test2 t WHERE t.`status` = 5;

结论

索引不是万金油,不是给字段加索引就行。需要考虑真正的业务场景

尽量对重复度低的列创建索引,如:外键值

当走索引查询的数据量占比超过总量的5%-10%的时候,查询优化器会判断直接走扫表