在数据仓库和ETL(提取、转换、加载)过程中,SQL Server 数据库是一个广泛使用的平台。ETL工程师需要熟练掌握SQL语句,以便有效地从源系统中提取数据,进行转换,并将其加载到目标数据库中。本文将介绍一些ETL工程师常用的SQL Server数据库SQL语句。
1. 数据提取(Extract)
查询特定列
SELECT Column1, Column2, Column3 FROM YourTable;
查询特定行
SELECT * FROM YourTable WHERE Condition;
使用JOIN连接多个表
SELECT Table1.Column, Table2.Column FROM Table1 JOIN Table2 ON Table1.JoinColumn = Table2.JoinColumn;
使用子查询
SELECT * FROM YourTable WHERE Column IN (SELECT Column FROM AnotherTable);
2. 数据转换(Transform)
数据类型转换
SELECT CAST(Column AS DataType) AS NewColumnName FROM YourTable;
字符串处理
- 大小写转换
SELECT UPPER(Column) AS UpperCase, LOWER(Column) AS LowerCase FROM YourTable;
- 字符串拼接
SELECT CONCAT(Column1, Column2) AS ConcatenatedColumn FROM YourTable;
- 字符串截取
SELECT LEFT(Column, Length) AS LeftSubstring, RIGHT(Column, Length) AS RightSubstring FROM YourTable;
日期处理
- 日期格式化
SELECT FORMAT(Column, 'yyyy-MM-dd') AS FormattedDate FROM YourTable;
- 日期加减
SELECT DATEADD(day, Number, Column) AS NewDate FROM YourTable;
3. 数据加载(Load)
插入新数据
INSERT INTO YourTable (Column1, Column2) VALUES (Value1, Value2);
更新现有数据
UPDATE YourTable SET Column1 = Value1, Column2 = Value2 WHERE Condition;
删除数据
DELETE FROM YourTable WHERE Condition;
批量插入数据
INSERT INTO YourTable (Column1, Column2) VALUES (Value1a, Value2a), (Value1b, Value2b), (Value1c, Value2c);
4. 性能优化
使用索引
CREATE INDEX IndexName ON YourTable (ColumnName);
分析查询性能
SET STATISTICS TIME ON; SELECT * FROM YourTable; SET STATISTICS TIME OFF;
5. 其他实用语句
查看表结构
EXEC sp_columns YourTable;
查看索引信息
EXEC sp_helpindex YourTable;
查看数据库文件大小
SELECT
DB_NAME(database_id) AS DatabaseName,
Name AS FileName,
type_desc AS FileType,
size/128.0 AS CurrentSizeMB,
size/128.0 - CAST(FILEPROPERTY(name, 'SpaceUsed') AS int)/128.0 AS FreeSpaceMB
FROM sys.master_files
WHERE database_id = DB_ID('YourDatabaseName');