02-常用技巧
数据库操作
查询所有数据库名和表名
-- 查询表名
select table_name
from information_schema.tables
where table_schema='database_name' and table_type='base table';
-- 查询字段名
select column_name
from information_schema.columns
where table_schema='database_name' and table_name='table_name';
查看MySQL版本
select version()
from dual;
用MySQL当计算器
select 1 + 1
from dual;
数据处理
随机抽样
-- 随机抽样1000行
select *
from tablename
order by rand()
limit 1000;
数据清洗
-- 缺失值填充0
ifnull(column_name, 0)
coalesce(column_name, 0)
-- md5加密
md5(column_name)
-- 字符转为数值
column_name*1
cast(column_name as decimal)
convert(column_name, decimal)
-- 保留两位小数,四舍五入
round(column_name, 2)
-- 字符串合并
concat(id, '-', name)
concat_ws('-', id, name)
-- 字符替换
replace(column_name, '-', '')
-- 截取字符串中第1-第10位
substr(column_name, 1, 10)
-- 计算逗号隔开的单词数
length(type)-length(replace(type,',',''))+1
-- 根据json数据:{'id': 'ZrPxoo0WN', 'score': 680, 'grade': 'A'},提取grade的值
convert(
substr(report,
locate('\'grade\': \'',report) + length('\'grade\': \''),
locate('\'}',report) - locate('\'grade\': \'',report) - length('\'grade\': \'')
)
using utf8
) grade
-- mysql版本5.7及以上可应用json_extract()函数直接取值
json_extract('{"grade": "A"}', "$.grade")
-- like和or的替换方法
,case when job like "%xxx%" or job like "%yyy%" then 1 else 0 end as job_type
,case when job regexp "xxx|yyy" then 1 else 0 end as job_type
字符串组合-合并
-
union_score:根据id汇总所有score,以逗号分隔
-
last_score:根据id汇总所有score,根据日期取最新的一个值
select
id
,group_concat(score) as union_score
,substring_index(group_concat(score order by create_time desc),',',1) as last_score
from tablename
group by id;
字符串组合-拆分
- 同一行中,存储多个id,且以逗号分隔;
- 将id拆分为多行并去重。
-- 方法1:借用mysql.help_topic
select
distinct substring_index(substring_index(id,',', b.help_topic_id+1),',',-1) union_id
from
(
select id
from tablename
) a
join mysql.help_topic b
on b.help_topic_id < (length(a.id)-length(replace(a.id,',','')) + 1);
-- 方法2:生成整数列
select
distinct substring_index(substring_index(id,',', b.int_value+1),',',-1) union_id
from
(
select id
from tablename
) a
join
(
select x1.n + x10.n*10 + x100.n*100 + x1000.n*1000 as int_value
from
(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x1,
(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x10,
(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x100,
(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x1000
where x1.n + x10.n*10 + x100.n*100 + x1000.n*1000 <= 10000
) b
on b.int_value < (length(a.id)-length(replace(a.id,',','')) + 1);
计算累加
-- 方法1
set @csum := 0;
select
date
,cnt
,(@csum := @csum + cnt) cum_cnt
from
(
select
date(create_time) date
,count(id) cnt
from tablename
where date(create_time) between '2021-01-01' and '2021-02-01'
group by 1
) a;
-- 方法2
select
t1.date
,t1.cnt
,sum(t2.cnt) cum_cnt
from
(
select
date(create_time) date
,count(id) cnt
from tablename
where date(create_time) between '2021-01-01' and '2021-02-01'
group by 1
) t1
join
(
select
date(create_time) date
,count(id) cnt
from tablename
where date(create_time) between '2021-01-01' and '2021-02-01'
group by 1
) t2
on t1.date >= t2.date
group by 1;
-- 方法3
select type1
,type2
,count(*)
,sum(count(*)) over (partition by type1 order by type2)
from tablename
group by 1,2
order by 1,2;
计算移动平均
-- 近3日数量的移动平均值
select
a.date
,avg(b.cnt) avg_cnt
from
(
select
date(create_time) date
,count(distinct id) cnt
from tablename
group by 1
) a
left join
(
select
date(create_time) date
,count(distinct id) cnt
from tablename
group by 1
) b
on datediff(a.date, b.date) between 0 and 2
group by a.date
order by a.date asc;
窗口函数
- MySQL8.0开始支持窗口函数。通过窗口函数,能以更简单的方式解决查询问题,并具有更好的性能。
聚合函数
- 通过over()函数,将聚合结果显示在每条单独的记录中
select
month
,avg(amount) over() as avg_amt
from tablename;
-- 等价于
select
month
,(select avg(amount) from tablename) as avg_amt
from tablename;
- partition by与order by子句
-- 按月计算平均
select
month
,avg(amount) over(partition by month) as avg_amt
from tablename;
-- 根据日期,计算一个月内逐天的累计平均
select
month
,avg(amount) over(partition by month order by date) as avg_amt
from tablename;
- 分组求占比
select type1
,type2
,count(*)
,count(*) / sum(count(*)) over(partition by type1)
from tablename
group by 1,2
order by 1,2;
-- 单组求占比
select type1
,count(*)
,count(*) / sum(count(*)) over(partition by 1)
from tablename
group by 1
order by 1;
序列函数
- ntile()函数:对表进行切片分组;
-- 按行分组
select
name
,ntile(5) over() as bin
from tablename;
-- 根据组内记录切片
select
name
,ntile(5) over(partition by shop) as bin
from tablename;
-- 根据组内记录排序后切片
select
name
,ntile(5) over(partition by shop order by date) as bin
from tablename;
- row_number()函数:生成每一条记录对应的序号;
select
name
,row_number() over(order by sales) as rank
from tablename;
select
name
,row_number() over(partition by month order by sales) as rank
from tablename;
- lag()和lead()函数:向后和向前移动;
select
name
,month
,sales
,lag(sales,1) over(partition by shopname order by month) as lag_sales -- 上个月销量
,lead(sales,1) over(partition by shopname order by month) as lead_sales -- 下个月销量
from tablename;
- first_value()和last_value():取第一个和最后一个值。
select
name
,month
,sales
,first_value(sales,1) over(partition by shopname order by month) as first_sales -- 第一笔销量
,last_value(sales,1) over(partition by shopname order by month) as last_sales -- 最后一笔销量
from tablename;
日期与时间
获取日期和时间
-- 当前日期与时间
now()
-- 当前日期
curdate()
current_date()
date(now())
-- 当前日期对应的年、季度、月份、周数、周几、天数
year(now())
quarter(now())
month(now())
weekofyear(now())
dayofweek(now())
day(now())
-- 当前时间
curtime()
current_time()
time(now)
-- 当前时间对应的小时、分钟、秒
hour(now())
minute(now())
second(now())
生成日期
select *
from
(
select adddate('1970-01-01', t4*10000 + t3*1000 + t2*100 + t1*10 + t0) `统计日期`
from
(select 0 t0 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0,
(select 0 t1 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1,
(select 0 t2 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2,
(select 0 t3 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3,
(select 0 t4 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4
) v
where `统计日期` between '2021-01-01' and now();
日期和时间格式转换
-- 提取日期
date_format('2021-01-21 12:21:21', '%Y-%m-%d')
substr('2021-01-21 12:21:21', 1, 10)
-- 提取月份
date_format('2021-01-21 12:21:21', '%Y-%m')
substr('2021-01-21 12:21:21', 1, 7)
-- 提取周数(跨年的周统计: %X要和%V一起使用,%Y要和%U、%m一起使用)
date_format('2021-01-21 12:21:21', '%x第%v周')
-- 时间与时间戳转换
unix_timestamp('2021-01-21 12:21:21')
from_unixtime(1611202881, '%Y-%m-%d %H:%i:%s')
- date_format()函数详细参数如下:
格式 | 描述 |
---|---|
%a | 缩写星期名 |
%b | 缩写月名 |
%c | 月,数值 |
%D | 带有英文前缀的月中的天 |
%d | 月的天,数值(00-31) |
%e | 月的天,数值(0-31) |
%f | 微秒 |
%H | 小时 (00-23) |
%h | 小时 (01-12) |
%I | 小时 (01-12) |
%i | 分钟,数值(00-59) |
%j | 年的天 (001-366) |
%k | 小时 (0-23) |
%l | 小时 (1-12) |
%M | 月名 |
%m | 月,数值(00-12) |
%p | AM 或 PM |
%r | 时间,12-小时(hh:mm:ss AM 或 PM) |
%S | 秒(00-59) |
%s | 秒(00-59) |
%T | 时间, 24-小时 (hh:mm:ss) |
%U | 周 (00-53) 星期日是一周的第一天 |
%u | 周 (00-53) 星期一是一周的第一天 |
%V | 周 (01-53) 星期日是一周的第一天,与 %X 使用 |
%v | 周 (01-53) 星期一是一周的第一天,与 %x 使用 |
%W | 星期名 |
%w | 周的天 (0=星期日, 6=星期六) |
%X | 年,其中的星期日是周的第一天,4 位,与 %V 使用 |
%x | 年,其中的星期一是周的第一天,4 位,与 %v 使用 |
%Y | 年,4 位 |
%y | 年,2 位 |
日期和时间的偏移
-- 昨天
date_sub(curdate(), interval 1 day)
-- 明天
date_add(curdate(), interval 1 day)
-- 上个月的今天
date_sub(curdate(), interval 1 month)
-- 下个月的今天
date_add(curdate(), interval 1 month)
-- 去年的今天
date_sub(curdate(), interval 1 year)
-- 明年的今天
date_add(curdate(), interval 1 year)
-- 本周周一
date_sub(curdate(), interval weekday(curdate()) + 0 day)
-- 上周周一
date_sub(curdate(), interval weekday(curdate()) + 7 day)
-- 上周周日
date_sub(curdate(), interval weekday(curdate()) + 1 day)
-- 本月第一天
date_add(curdate() - day(curdate()) + 1, interval 0 month)
date_add(curdate(), interval - day(curdate()) + 1 day)
-- 本月最后一天
last_day(curdate())
-- 上个月最后一天
last_day(date_sub(curdate(), interval 1 month))
日期和时间的计算
-- 两个日期之间的计算:datediff(`结束日期`, `开始日期`)
datediff('2022-02-22', '2021-01-21')
-- 其他时间维度的计算
timestampdiff(second, '2021-01-21 12:21:21', '2021-01-21 12:21:31')
timestampadd(year, 21, '2021-01-21 12:21:21')
timestampdiff和timestampadd详细参数如下:
参数 | 含义 |
---|---|
second | 秒 |
mintue | 分钟 |
hour | 小时 |
day | 天 |
week | 星期 |
month | 月 |
quarter | 季度 |
year | 年 |