解析SQL中的底表
主要目标是获取出StarRocks雾化中的底表和字段备注,之后给字段赋予备注值,存入库表,可以动态生成数据字典,web可以利用该表
实现mybatis的动态sql拼接,动态化的excel导出导入,魔板等功能。尝试使用了Jsqlparser解析sql语句,发现遇到部分
复杂的子查询内包含union all情况解析非常慢,解析完成20多个视图需要至少3min不能满足集成到web的需求,尝试了Calcite
这块有待于进一步研究,解析中构造语法树ast就出现了问题,需要细细研究javacc编译器等,复杂度上升,直接利用正则表达式来搞定。
calcite解析中遇到的问题
一种是case when (left() = 'bbb') then 无法识别(left
另外一种是 interval ( 7 day) 无法解析 interval (
如果单纯的避免关键字,采用反引号注释掉,后面还有个 7 day,无法解析。
效率较Jsqlparser高很多
代码中部分是无用的,使用时候看一下吧
import net.sf.jsqlparser.expression.Alias;
import net.sf.jsqlparser.statement.select.PlainSelect;
import net.sf.jsqlparser.statement.select.Select;
import net.sf.jsqlparser.statement.select.SelectExpressionItem;
import net.sf.jsqlparser.statement.select.SelectItem;
import java.sql.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ViewDictByRege {
private static Connection conn = null;
private static Map<String, List<String>> viewMapLists = new HashMap<>();
static {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
try {
conn = DriverManager.getConnection("jdbc:mysql://starip:9030/ic_cyb_cdm", "staruser", "123456");
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
//1、show MATERIALIZED VIEW from ic_cyb_cdm where name like '%_v' 查询所有的物化视图
// 2、show create table ic_cyb_cdm.cyb_dwd_sales_week_global_v 查询字段构建map
// 3、解析出所有的底表
// 4、遍历底表构建所有字段的map,取有备注的赋值
// 5、比对给目标map赋值
// todo 待优化项,加数据库连接池
public static void main(String[] args) {
long start = System.currentTimeMillis();
getMeterialViewDDL();
long end = System.currentTimeMillis();
System.out.println("检测耗费时间"+(end-start));
}
public static Map<String, String> getMeterialViewDDL() {
PreparedStatement pst = null;
try {
pst = conn.prepareStatement("show MATERIALIZED VIEW from ic_cyb_cdm where name like '%_v'");
ResultSet rs = pst.executeQuery();
while (rs.next()) {
String viewName = "ic_cyb_cdm." + rs.getString(2);
String viewDdl = rs.getString(4).trim();
String viewLower = viewDdl.toLowerCase(Locale.ROOT);
if (viewLower != null) {
int select = viewLower.indexOf("select");
String viewSelect = viewLower.substring(select);
// inner table remarks
Set<String> viewUnderTables = parserTableName(viewSelect, viewName);
String colRemarksSql = getRemarksSql(viewUnderTables);
Map<String, String> innerRemarksMap = tableRemarks(colRemarksSql);
// star view remarks
String viewRemarksSql = getColRemarksSql(viewName);
Map<String, String> viewStarRemarksMap = tableRemarks(viewRemarksSql);
for (Map.Entry<String, String> entry : viewStarRemarksMap.entrySet()) {
String columnName = entry.getKey();
String columnRemark = entry.getValue();
if ("".equalsIgnoreCase(columnRemark)) {
if (innerRemarksMap.containsKey(columnName)) {
entry.setValue(innerRemarksMap.get(columnName));
} else if ("search_time".equalsIgnoreCase(columnName)) {
entry.setValue("搜索时间");
} else if ("uuid".equalsIgnoreCase(columnName)) {
entry.setValue("uuid");
} else {
entry.setValue("暂无备注名,请手动添加");
}
}
}
// pg view remarks
String pgViewColRemark = "select tfi.field ,tfi.notes from zcb_dev.t_table_info tti " +
" left join zcb_dev.t_field_info tfi on tti.id = tfi.table_id" +
" where tti.table_name = '" + viewName.split("\\.")[1] + "'";
Map<String, String> pgColRemark = pgTableRemark(pgViewColRemark);
if (pgColRemark.size() > viewStarRemarksMap.size()) {
// 说明有字段删除,匹配出来删除的字段做提示,手动人工pg库表去删除该字段
String delColumns = compareFieldMap(pgColRemark, viewStarRemarksMap);
System.out.println("需要在pg库t_filed_info表内删除视图:" + viewName + "的字段和备注有:" + delColumns);
} else if (pgColRemark.size() < viewStarRemarksMap.size()) {
// 说明有新增字段,新增字段直接插入
String allColumns = compareFieldMap(viewStarRemarksMap, pgColRemark);
System.out.println("需要在pg库t_filed_info表内添加视图:" + viewName + "的字段和备注有:" + allColumns);
} else {
// 字段数没有变化,如果pg字段为空,则做修改,不然不用修改
String diffRemarks = compareValueMap(viewStarRemarksMap, pgColRemark);
if(!"".equalsIgnoreCase(diffRemarks)){
System.out.println("需要在pg库t_filed_info表内修改视图:" + viewName + "的字段和备注有:" + diffRemarks);
}else {
System.out.println("pg库t_filed_info表内没有需要修改的字段和备注视图:" + viewName);
}
}
}
}
} catch (SQLException e) {
throw new RuntimeException(e);
} finally {
try {
pst.close();
conn.close();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
return null;
}
/**
* 比对map的值,如果map的值不一致,返回不一致的结构
*
* @param oriViewMap
* @param targetRemark
* @return
*/
private static String compareValueMap(Map<String, String> oriViewMap, Map<String, String> targetRemark) {
String diffRemarks = "";
for (Map.Entry<String, String> entry : oriViewMap.entrySet()) {
String columnName = entry.getKey();
String columnRemark = entry.getValue();
// 如果pg库里面是空
if("".equalsIgnoreCase(targetRemark.get(columnName)) || null == targetRemark.get(columnName)){
diffRemarks += "columnName:" + columnName + " view new remark: " + columnRemark + " pg column remark: " + targetRemark.get(columnName);
// System.out.println("columnName:" + columnName + " view new remark: " + columnRemark + " pg column remark: " + targetRemark.get(columnName));
}
}
return diffRemarks;
}
/**
* 原始map和目标map做比对,返回原始map中的不同的值
*
* @param oriMap
* @param targetMap
*/
private static String compareFieldMap(Map<String, String> oriMap, Map<String, String> targetMap) {
String compareResult = "";
for (Map.Entry<String, String> pgMap : oriMap.entrySet()) {
String colName = pgMap.getKey();
String colRemark = pgMap.getValue();
if (!targetMap.containsKey(colName)) {
compareResult += (" colName: " + colName + " remark: " + colRemark);
}
}
return compareResult;
}
//待优化
private static Map<String, String> pgTableRemark(String pgViewColRemark) {
Map<String, String> colRemarkMap = new HashMap<>();
try {
Class.forName("org.postgresql.Driver");
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
try {
Connection conn = DriverManager.getConnection("jdbc:postgresql://pgip:5432/z_test", "user", "#123");
Statement statement = conn.createStatement();
ResultSet rs = statement.executeQuery(pgViewColRemark);
while (rs.next()) {
String colName = rs.getString(1);
String colRemarks = rs.getString(2);
// System.out.println(colName + "----" + colRemarks);
colRemarkMap.put(colName, colRemarks);
}
statement.close();
conn.close();
} catch (SQLException e) {
throw new RuntimeException(e);
}
return colRemarkMap;
}
private static Map<String, String> tableRemarks(String colRemarksSql) throws SQLException {
PreparedStatement preparedColumns = conn.prepareStatement(colRemarksSql);
ResultSet remarkRs = preparedColumns.executeQuery();
Map<String, String> innerRemarks = new HashMap<>();
while (remarkRs.next()) {
String columnName = remarkRs.getString(3);
String columnRemarks = remarkRs.getString(4);
innerRemarks.put(columnName, columnRemarks);
}
try {
preparedColumns.close();
} catch (SQLException e) {
throw new RuntimeException(e);
}
return innerRemarks;
}
private static String getRemarksSql(Set<String> viewUnderTables) {
return getColRemarksSql(viewUnderTables);
}
// 目标获取 视图对应的底表名
public static Set<String> parserTableName(String sql, String viewName) {
// 使用正则表达式匹配表名称
Pattern pattern = Pattern.compile("(?:from|join)\\s+(\\w+.\\w+)");
Matcher matcher = pattern.matcher(sql.replaceAll("`", "").replaceAll("hive.", "").toLowerCase(Locale.ROOT));
Set<String> underTables = new HashSet<>();
while (matcher.find()) {
String tableName = matcher.group(1);
// System.out.println(viewName + " Regluar tableName: " + tableName);// ic_cyb_ods.cyb_ods_byd_week_production_ds_s
underTables.add(tableName);
}
return underTables;
}
/*select table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT from information_schema.columns where 1=1 and COLUMN_COMMENT is not null and COLUMN_COMMENT != '' and
((table_schema ='ic_cyb_ods' and TABLE_NAME ='cyb_ods_t_api_byd_used_car_ds_s') or (table_schema ='ic_cyb_cdm' and TABLE_NAME ='cyb_dwd_catarc_sales_business_v'))
*/
public static String getColRemarksSql(Set<String> innerTableSet) {
String whereSql = "select table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT from information_schema.columns where 1=1 and COLUMN_COMMENT is not null and COLUMN_COMMENT != '' ";
if (!innerTableSet.isEmpty()) {
whereSql += " and (";
String[] innerTables = innerTableSet.toArray(new String[innerTableSet.size()]);
for (int i = 0; i < innerTables.length; i++) {
String innerTable = innerTables[i];
String[] split = innerTable.split("\\.");
String schema = split[0];
String tableName = split[1];
if (i < innerTables.length - 1) {
whereSql += "(table_schema ='" + schema + "'" + " and TABLE_NAME = '" + tableName + "') or ";
} else {
whereSql += "(table_schema ='" + schema + "'" + " and TABLE_NAME = '" + tableName + "') ) ";
}
}
}
// System.out.println("备注Sql查询: " + whereSql);
return whereSql;
}
public static String getColRemarksSql(String wholeTableName) {
String whereSql = "select table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT " +
" from information_schema.columns where 1=1 ";
String[] split = wholeTableName.split("\\.");
String schema = split[0];
String tableName = split[1];
whereSql += " and table_schema ='" + schema + "'" + " and TABLE_NAME = '" + tableName + "' ";
return whereSql;
}
public static void getAllColumns(Select selectStatement, Map<String, String> aliasMap, Map<String, String> remarkMap) {
PlainSelect plainSelect = (PlainSelect) selectStatement.getSelectBody();
List<SelectItem> selectItems = plainSelect.getSelectItems();
// System.out.println(((SelectExpressionItem)selectItems.get(0)).getAlias().getName());
//获取字段名
System.out.println("====================");
for (SelectItem selectItem : selectItems) {
// SimpleNode astNode = selectItem.
// 处理每个选项,这里假设只有一列为ColumnSelectExpression。
if (selectItem instanceof SelectExpressionItem) {
SelectExpressionItem expressionItem = (SelectExpressionItem) selectItem;
// 获取列名
String columnName = expressionItem.getExpression().toString();
String columnNameOri = null;
// 包含函数的字段肯定是加工出来的,这种备注在原表内无法匹配到需要手动维护备注名称,不包含函数的是可以原表内找到字段的
if (columnName.contains(".") && !columnName.contains("(")) {
int split_char = columnName.lastIndexOf(".");
columnNameOri = columnName.substring(split_char + 1);
} else {
columnNameOri = columnName;
}
Alias alias = expressionItem.getAlias();
if (alias != null) {
// 带有别名的处理 Column original Name: `order_num` aliasName:`order_id`
String aliasName = alias.getName();
aliasMap.put(aliasName, columnNameOri);
remarkMap.put(aliasName, "无");//暂时为空
// System.out.println("Column Name: " + columnName + " Column original Name: " + columnNameOri + " aliasName:" + alias.getName());
} else {
remarkMap.put(columnNameOri, "无");//暂时为空
// System.out.println("Column Name: " + columnNameOri);
}
}
}
}
}
标签:Map,StarRocks,String,System,雾化,columnName,视图,table,schema
From: https://www.cnblogs.com/hbym/p/17565044.html