clickhouse 21.10
Feature : UDF
- 用户可通过添加lambda表达式,创建自定义Function
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
SELECT number, linear_equation(number, 2, 1) FROM numbers(3);
CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');
SELECT number, parity_str(number) FROM numbers(3);
Feature : intersect
- 求多个数据集在某一维度上的交集,适合在用户分群等类似业务场景使用
select count( 1) from (
select id as create_user from app.user_model where 1=1 and product_count>=10 and product_count<=300
intersect
select create_user from app.work_basic_model where total_uv>=100 and total_uv<=350
intersect
select create_user from app.work_basic_model where total_uv>=200 and total_uv<=350
)
类似:
with
(select groupUniqArray(u_i) from (select id as u_i from (select *from app.user_model where 1=1 and product_count>=10 and product_count<=300 ) )) as u0,
(select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=100 and total_uv<=350 ) )) as u1 ,
(select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=200 and total_uv<=350 ) )) as u2
select length(arrayIntersect(u0,u1,u2)) as u
Feature : except
用第一个查询子集与后面所有子集求差集
select arrayJoin([1,2,3,4])
except
select arrayJoin([1,2])
except
select arrayJoin([4,5])
Feature : leftPad/rightPad(字符串左/右补齐)
- 可用于对某些敏感信息进行脱敏处理
SELECT
leftPad(substring(phone,-3,3), length( phone ), '*')
from (select '13126966152' phone)
Feature : splitByRegexp
- 按照正则表达式对文本进行分割,分割后返回一个数组
-- 提取html中的所有去标签后的文本信息
select
splitByRegexp('<[^<>]*>', x)
from (select arrayJoin(['<h1>hello<h2>world</h2></h1>', 'gbye<split>bug']) x)
Feature : mapContains/mapKeys/mapValues
- 新增map数据类型相关处理函数
select map( 'aa', 4, 'bb' , 5) as m, mapContains(m, 'aa'),mapContains(m, 'cc'), mapKeys(m), mapValues(m)
Feature : countMatches
- 基于正则表达式统计匹配数
select countMatches('foo.com bar.com baz.com bam.com', '([^. ]+)\.([^. ]+)')
Feature : accurateCastOrNull
- 对字段值进行类型转换校验,转换成功返回转换后的类型数据,否则Null
SELECT accurateCastOrNull(2, 'Int8'), accurateCastOrNull('ss', 'Int8')
Feature : countSubstrings/countSubstringsCaseInsensitive
- 计算某个字符串中包含特定字符的数量
select countSubstrings('com.foo.com.bar.com', 'com') ,countSubstringsCaseInsensitive('BaBaB', 'A')