Skip to the content.

02-常用技巧


数据库操作

查询所有数据库名和表名

-- 查询表名
select table_name
from information_schema.tables
where table_schema='database_name' and table_type='base table';

-- 查询字段名
select column_name
from information_schema.columns
where table_schema='database_name' and table_name='table_name';


查看MySQL版本

select version() 
from dual;


用MySQL当计算器

select 1 + 1
from dual;


数据处理

随机抽样

-- 随机抽样1000行
select *
from tablename
order by rand()
limit 1000;

数据清洗

-- 缺失值填充0
ifnull(column_name, 0)
coalesce(column_name, 0)

-- md5加密
md5(column_name)

-- 字符转为数值
column_name*1
cast(column_name as decimal)
convert(column_name, decimal)

-- 保留两位小数,四舍五入
round(column_name, 2)

-- 字符串合并
concat(id, '-', name)
concat_ws('-', id, name)

-- 字符替换
replace(column_name, '-', '')

-- 截取字符串中第1-第10位
substr(column_name, 1, 10)

-- 计算逗号隔开的单词数
length(type)-length(replace(type,',',''))+1

-- 根据json数据:{'id': 'ZrPxoo0WN', 'score': 680, 'grade': 'A'},提取grade的值
convert(
         substr(report,
                locate('\'grade\': \'',report) + length('\'grade\': \''), 
                locate('\'}',report) - locate('\'grade\': \'',report) - length('\'grade\': \'')
               )
         using utf8
       ) grade

-- mysql版本5.7及以上可应用json_extract()函数直接取值
json_extract('{"grade": "A"}', "$.grade")

-- like和or的替换方法
,case when job like "%xxx%" or job like "%yyy%" then 1 else 0 end as job_type
,case when job regexp "xxx|yyy" then 1 else 0 end as job_type

字符串组合-合并

select
 id
,group_concat(score) as union_score
,substring_index(group_concat(score order by create_time desc),',',1) as last_score
from tablename
group by id;

字符串组合-拆分

-- 方法1:借用mysql.help_topic
select 
distinct substring_index(substring_index(id,',', b.help_topic_id+1),',',-1) union_id
from 
    (
    select id
    from tablename
    ) a
join mysql.help_topic b
on b.help_topic_id < (length(a.id)-length(replace(a.id,',','')) + 1);

-- 方法2:生成整数列
select 
distinct substring_index(substring_index(id,',', b.int_value+1),',',-1) union_id
from 
    (
    select id
    from tablename
    ) a
join 
    (
    select x1.n + x10.n*10 + x100.n*100 + x1000.n*1000 as int_value
    from 
    	(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x1,
    	(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x10,
    	(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x100,
    	(select 0 as n union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) x1000
    where x1.n + x10.n*10 + x100.n*100 + x1000.n*1000 <= 10000
    ) b
on b.int_value < (length(a.id)-length(replace(a.id,',','')) + 1);

计算累加

-- 方法1
set @csum := 0;
select 
 date
,cnt
,(@csum := @csum + cnt) cum_cnt
from 
    (
    select 
     date(create_time) date
    ,count(id) cnt
    from tablename
    where date(create_time) between '2021-01-01' and '2021-02-01'
    group by 1
    ) a;

-- 方法2
select 
 t1.date
,t1.cnt
,sum(t2.cnt) cum_cnt
from 
    (
    select 
     date(create_time) date
    ,count(id) cnt
    from tablename
    where date(create_time) between '2021-01-01' and '2021-02-01'
    group by 1
    ) t1
join
    (
    select 
     date(create_time) date
    ,count(id) cnt
    from tablename
    where date(create_time) between '2021-01-01' and '2021-02-01'
    group by 1
    ) t2
on t1.date >= t2.date
group by 1;

-- 方法3
select type1
,type2
,count(*)
,sum(count(*)) over (partition by type1 order by type2) 
from tablename
group by 1,2 
order by 1,2;

计算移动平均

-- 近3日数量的移动平均值
select  
 a.date
,avg(b.cnt) avg_cnt
from 
    (
    select 
     date(create_time) date
    ,count(distinct id) cnt
    from tablename
    group by 1
    ) a
left join 
    (
    select 
     date(create_time) date
    ,count(distinct id) cnt
    from tablename
    group by 1
    ) b
on datediff(a.date, b.date) between 0 and 2
group by a.date
order by a.date asc;



窗口函数

聚合函数

select
 month
,avg(amount) over() as avg_amt
from tablename;

-- 等价于
select
 month
,(select avg(amount) from tablename) as avg_amt
from tablename;
-- 按月计算平均
select
 month
,avg(amount) over(partition by month) as avg_amt
from tablename;

-- 根据日期,计算一个月内逐天的累计平均
select
 month
,avg(amount) over(partition by month order by date) as avg_amt
from tablename;
select type1
,type2
,count(*)
,count(*) / sum(count(*)) over(partition by type1) 
from tablename
group by 1,2 
order by 1,2;

-- 单组求占比
select type1
,count(*)
,count(*) / sum(count(*)) over(partition by 1) 
from tablename
group by 1
order by 1;

序列函数

-- 按行分组
select 
 name
,ntile(5) over() as bin
from tablename;

-- 根据组内记录切片
select 
 name
,ntile(5) over(partition by shop) as bin
from tablename;

-- 根据组内记录排序后切片
select 
 name
,ntile(5) over(partition by shop order by date) as bin
from tablename;
select 
 name
,row_number() over(order by sales) as rank
from tablename;

select 
 name
,row_number() over(partition by month order by sales) as rank
from tablename;
select
 name
,month
,sales
,lag(sales,1) over(partition by shopname order by month) as lag_sales -- 上个月销量
,lead(sales,1) over(partition by shopname order by month) as lead_sales -- 下个月销量
from tablename;
select
 name
,month
,sales
,first_value(sales,1) over(partition by shopname order by month) as first_sales -- 第一笔销量
,last_value(sales,1) over(partition by shopname order by month) as last_sales -- 最后一笔销量
from tablename;



日期与时间

获取日期和时间

-- 当前日期与时间
now()

-- 当前日期
curdate()
current_date()
date(now())

-- 当前日期对应的年、季度、月份、周数、周几、天数
year(now())
quarter(now())
month(now())
weekofyear(now())
dayofweek(now())
day(now())

-- 当前时间
curtime()
current_time()
time(now)

-- 当前时间对应的小时、分钟、秒
hour(now())
minute(now())
second(now())

生成日期

select *
from 
    (
    select adddate('1970-01-01', t4*10000 + t3*1000 + t2*100 + t1*10 + t0) `统计日期`
    from 
        (select 0 t0 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0, 
        (select 0 t1 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1, 
        (select 0 t2 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2, 
        (select 0 t3 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3, 
        (select 0 t4 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4
    ) v 
where `统计日期` between '2021-01-01' and now();

日期和时间格式转换

-- 提取日期
date_format('2021-01-21 12:21:21', '%Y-%m-%d')
substr('2021-01-21 12:21:21', 1, 10)

-- 提取月份
date_format('2021-01-21 12:21:21', '%Y-%m')
substr('2021-01-21 12:21:21', 1, 7)

-- 提取周数(跨年的周统计: %X要和%V一起使用,%Y要和%U、%m一起使用)
date_format('2021-01-21 12:21:21', '%x第%v周')

-- 时间与时间戳转换
unix_timestamp('2021-01-21 12:21:21')
from_unixtime(1611202881, '%Y-%m-%d %H:%i:%s')
格式 描述
%a 缩写星期名
%b 缩写月名
%c 月,数值
%D 带有英文前缀的月中的天
%d 月的天,数值(00-31)
%e 月的天,数值(0-31)
%f 微秒
%H 小时 (00-23)
%h 小时 (01-12)
%I 小时 (01-12)
%i 分钟,数值(00-59)
%j 年的天 (001-366)
%k 小时 (0-23)
%l 小时 (1-12)
%M 月名
%m 月,数值(00-12)
%p AM 或 PM
%r 时间,12-小时(hh:mm:ss AM 或 PM)
%S 秒(00-59)
%s 秒(00-59)
%T 时间, 24-小时 (hh:mm:ss)
%U 周 (00-53) 星期日是一周的第一天
%u 周 (00-53) 星期一是一周的第一天
%V 周 (01-53) 星期日是一周的第一天,与 %X 使用
%v 周 (01-53) 星期一是一周的第一天,与 %x 使用
%W 星期名
%w 周的天 (0=星期日, 6=星期六)
%X 年,其中的星期日是周的第一天,4 位,与 %V 使用
%x 年,其中的星期一是周的第一天,4 位,与 %v 使用
%Y 年,4 位
%y 年,2 位

日期和时间的偏移

-- 昨天
date_sub(curdate(), interval 1 day)
-- 明天
date_add(curdate(), interval 1 day)

-- 上个月的今天
date_sub(curdate(), interval 1 month)
-- 下个月的今天
date_add(curdate(), interval 1 month) 

-- 去年的今天
date_sub(curdate(), interval 1 year)
-- 明年的今天
date_add(curdate(), interval 1 year) 

-- 本周周一
date_sub(curdate(), interval weekday(curdate()) + 0 day)
-- 上周周一
date_sub(curdate(), interval weekday(curdate()) + 7 day)
-- 上周周日
date_sub(curdate(), interval weekday(curdate()) + 1 day)

-- 本月第一天
date_add(curdate() - day(curdate()) + 1, interval 0 month)
date_add(curdate(), interval - day(curdate()) + 1 day)
-- 本月最后一天
last_day(curdate())
-- 上个月最后一天
last_day(date_sub(curdate(), interval 1 month))

日期和时间的计算

-- 两个日期之间的计算:datediff(`结束日期`, `开始日期`)
datediff('2022-02-22', '2021-01-21')

-- 其他时间维度的计算
timestampdiff(second, '2021-01-21 12:21:21', '2021-01-21 12:21:31')
timestampadd(year, 21, '2021-01-21 12:21:21')

timestampdiff和timestampadd详细参数如下:

参数 含义
second
mintue 分钟
hour 小时
day
week 星期
month
quarter 季度
year