Introduction
Maintaining data quality in your application is essential to prevent issues such as form access failures or deployment errors. This article outlines a step-by-step process for conducting a sanity check on data quality across all partitions and provides guidance on resolving invalid records.
Step 1. Create a store procedure
To automate the data quality check, create a stored procedure in SQL Server.
-
Open SQL Server Management Studio.
-
Connect to the target database instance.
-
Open a New Query window and switch the context to the target application database.
-
Execute the following SQL statement to create the stored procedure.
IF SCHEMA_ID(N'devops') IS NULL
BEGIN
EXEC('CREATE SCHEMA devops');
END;
GO
CREATE OR ALTER PROCEDURE [devops].[sp_RunDataQualityCheck]
AS
BEGIN
SET NOCOUNT ON;
DECLARE @SqlCmd NVARCHAR(MAX);
DECLARE @ModelName NVARCHAR(255), @MeasureGroupName NVARCHAR(255), @PartitionName NVARCHAR(255), @Source NVARCHAR(255), @StorageMode NVARCHAR(255);
DECLARE @SchemaName NVARCHAR(255), @TableOrViewName NVARCHAR(255), @IsRulePartition BIT;
DECLARE @DefinitionError BIT, @TotalRows INT, @ElapsedSeconds INT;
DECLARE @NullValueAndCommentCount INT, @NullMemberIdCount INT, @DimensionMemberNaCount INT = 0, @HierarchyMemberNaCount INT = 0;
DROP TABLE IF EXISTS #Errors;
CREATE TABLE #Errors (
[model_name] NVARCHAR(255),
[measure_group_name] NVARCHAR(255),
[partition_name] NVARCHAR(255),
[source] NVARCHAR(255),
[storage_mode] NVARCHAR(255),
[definition_error] BIT,
[total_rows] INT,
[timing_seconds] INT,
[null_value_and_comment_count] INT,
[null_memberId_count] INT,
[dimension_member_na_count] INT,
[hierarchy_member_na_count] INT
);
DROP TABLE IF EXISTS #SQLNames;
CREATE TABLE #SQLNames (
[dimension_name] NVARCHAR(MAX),
[hierarchy_name] NVARCHAR(MAX),
[column_name] NVARCHAR(MAX)
);
DECLARE CUR CURSOR LOCAL FAST_FORWARD FOR
SELECT M.[ModelName], P.[MeasureGroupName], P.[PartitionName], [Source], P.[StorageMode], 0 as [IsRule]
FROM [dbo].[Partitions] P
INNER JOIN [dbo].[Models] M ON P.[ModelID] = M.[ModelID]
UNION
SELECT M.ModelName, 'CoreMG', 'Rule', '[dbo].[V_R_Model_' + M.ModelName + ']', 'ROLAP', 1 as [IsRule]
FROM [dbo].[Models] M;
OPEN CUR;
FETCH NEXT FROM CUR INTO @ModelName, @MeasureGroupName, @PartitionName, @Source, @StorageMode, @IsRulePartition;
WHILE @@FETCH_STATUS = 0
BEGIN
PRINT CONCAT('Processing table/view: ', @Source);
SET @TotalRows = 0;
SET @DefinitionError = 0;
SET @ElapsedSeconds = 0;
SET @NullValueAndCommentCount = 0;
SET @NullMemberIdCount = 0;
SET @DimensionMemberNaCount = 0;
SET @HierarchyMemberNaCount = 0;
IF OBJECT_ID(@Source) IS NULL
BEGIN
PRINT ('Table/view not found. Skipping.');
FETCH NEXT FROM CUR INTO @ModelName, @MeasureGroupName, @PartitionName, @Source, @StorageMode, @IsRulePartition;
CONTINUE;
END
-- Parse schema and table/view names
SET @SchemaName = ISNULL(NULLIF(PARSENAME(REPLACE(@Source, '\[', ''), 2), ''), 'dbo');
SET @TableOrViewName = PARSENAME(REPLACE(@Source, '\[', ''), 1);
DECLARE @StartTime DATETIME = GETDATE();
DECLARE @HasValueColumn BIT = 0, @HasCommentColumn BIT = 0;
-- Check if 'Value' and 'Comment' columns exist
SELECT @HasValueColumn = COUNT(1)
FROM [INFORMATION_SCHEMA].[COLUMNS]
WHERE TABLE_NAME = @TableOrViewName AND TABLE_SCHEMA = @SchemaName AND COLUMN_NAME = 'Value';
SELECT @HasCommentColumn = COUNT(1)
FROM [INFORMATION_SCHEMA].[COLUMNS]
WHERE TABLE_NAME = @TableOrViewName AND TABLE_SCHEMA = @SchemaName AND COLUMN_NAME = 'Comment';
-- Identify dimension-related columns
DECLARE @Columns TABLE ([ColumnName] NVARCHAR(MAX));
DELETE @Columns;
INSERT INTO @Columns
SELECT DISTINCT [COLUMN_NAME]
FROM [INFORMATION_SCHEMA].[COLUMNS]
WHERE TABLE_NAME = @TableOrViewName
AND TABLE_SCHEMA = @SchemaName
AND COLUMN_NAME IN (SELECT [DimensionName] + 'ID' FROM [Dimensions]);
-- Build dimension/hierarchy names
TRUNCATE TABLE #SQLNames;
INSERT INTO #SQLNames (dimension_name, hierarchy_name, column_name)
SELECT DISTINCT
CONCAT('[D_', D.DimensionName, ']') AS dimension_name,
CONCAT('[H_', D.DimensionName, '_', H.HierarchyName, ']') AS hierarchy_name,
MD.ModelDimName + 'ID' AS column_name
FROM [Partitions] P
INNER JOIN [ModelDimensions] MD ON P.ModelID = MD.ModelID
INNER JOIN Hierarchies H ON MD.HierarchyID = H.HierarchyID
INNER JOIN Dimensions D ON D.DimensionID = MD.DimensionID
INNER JOIN @Columns C ON C.ColumnName = MD.ModelDimName + 'ID'
WHERE P.[Source] = @Source;
-- Construct dynamic joins
DECLARE @Dimension_join_string NVARCHAR(MAX) = COALESCE((
SELECT STRING_AGG('LEFT JOIN ' + dimension_name + ' ON ' + dimension_name + '.[MemberId] = F.[' + column_name + ']', CHAR(10))
FROM #SQLNames), '');
DECLARE @Hierarchy_join_string NVARCHAR(MAX) = COALESCE((
SELECT STRING_AGG('LEFT JOIN ' + hierarchy_name + ' ON ' + hierarchy_name + '.[MemberId] = F.[' + column_name + ']', CHAR(10))
FROM #SQLNames), '');
SET @StartTime = GETDATE();
BEGIN TRY
SET @SqlCmd = N'SELECT @RowCount = COUNT(1) FROM ' + @Source;
EXEC sp_executesql @SqlCmd, N'@RowCount INT OUTPUT', @TotalRows OUTPUT;
SET @ElapsedSeconds = DATEDIFF(SECOND, @StartTime, GETDATE());
IF (@HasValueColumn = 1 AND @HasCommentColumn = 1)
BEGIN
SET @SqlCmd = N'SELECT @Count = COUNT(1) FROM ' + @Source + ' WHERE [Value] IS NULL AND [Comment] IS NULL';
EXEC sp_executesql @SqlCmd,
N'@Count INT OUTPUT', @NullValueAndCommentCount OUTPUT;
END
ELSE IF (@HasValueColumn = 1)
BEGIN
SET @SqlCmd = N'SELECT @Count = COUNT(1) FROM ' + @Source + ' WHERE [Value] IS NULL';
EXEC sp_executesql @SqlCmd,
N'@Count INT OUTPUT', @NullValueAndCommentCount OUTPUT;
END
IF EXISTS (SELECT 1 FROM #SQLNames)
BEGIN
DECLARE @Null_Value_Condition NVARCHAR(MAX) = (SELECT STRING_AGG('F.[' + column_name + '] IS NULL', ' OR ') FROM #SQLNames);
SET @SqlCmd = N'SELECT @Count = COUNT(1) FROM ' + @Source + ' F WHERE ' + @Null_Value_Condition;
EXEC sp_executesql @SqlCmd,
N'@Count INT OUTPUT', @NullMemberIdCount OUTPUT;
END
IF EXISTS (SELECT 1 FROM #SQLNames)
BEGIN
DECLARE @Dimension_Member_NA_Condition NVARCHAR(MAX) = (SELECT STRING_AGG(dimension_name + '.[MemberId] IS NULL', ' OR ') FROM #SQLNames);
SET @SqlCmd = N'SELECT @Count = COUNT(1) FROM ' + @Source + ' F ' + @Dimension_join_string + ' WHERE ' + @Dimension_Member_NA_Condition;
EXEC sp_executesql @SqlCmd,
N'@Count INT OUTPUT', @DimensionMemberNaCount OUTPUT;
DECLARE @Hierarchy_Member_NA_Condition NVARCHAR(MAX) = (SELECT STRING_AGG(hierarchy_name + '.[MemberId] IS NULL', ' OR ') FROM #SQLNames);
SET @SqlCmd = N'SELECT @Count = COUNT(1) FROM ' + @Source + ' F ' + @Hierarchy_join_string + ' WHERE ' + @Hierarchy_Member_NA_Condition;
EXEC sp_executesql @SqlCmd,
N'@Count INT OUTPUT', @HierarchyMemberNaCount OUTPUT;
END
END TRY
BEGIN CATCH
--PRINT @SQLCMD
SET @DefinitionError = 1;
END CATCH
/* Insert results */
INSERT INTO #Errors
VALUES (@ModelName, @MeasureGroupName, @PartitionName, @Source, @StorageMode, @DefinitionError,
@TotalRows, @ElapsedSeconds, @NullValueAndCommentCount, @NullMemberIdCount,
@DimensionMemberNaCount, @HierarchyMemberNaCount);
FETCH NEXT FROM CUR INTO @ModelName, @MeasureGroupName, @PartitionName, @Source, @StorageMode, @IsRulePartition;
END
CLOSE CUR;
DEALLOCATE CUR;
IF OBJECT_ID('[devops].[DataQualityCheck]') IS NOT NULL
DROP TABLE [devops].[DataQualityCheck];
SELECT * INTO [devops].[DataQualityCheck]
FROM #Errors
ORDER BY
definition_error DESC,
null_memberId_count DESC,
dimension_member_na_count DESC,
hierarchy_member_na_count DESC;
SELECT *
FROM [devops].[DataQualityCheck];
END
Step 2. Run the data quality check
Once the stored procedure is created, execute the following SQL statement to check the data quality.
EXEC [devops].[sp_RunDataQualityCheck]
Note: Once the [devops].[sp_RunDataQualityCheck]
stored procedure is available in your application, you can rerun it at any time. Each execution will analyze the latest data quality in your application, ensuring continuous monitoring and improvement.
This query will generate a report displaying data quality results for your application.
The result will contain a table with the following columns.
Column | Description |
---|---|
model_name | Display name of the model. |
measure_group_name | Display name of the measure group. |
partition_name | Display name of the partition. |
source | Source table or view of the partition. |
storage_mode | Storage mode of the partition (ROLAP or MOLAP). |
definition_error | Indicates any definition errors in the partition. |
total_rows | Total number of records in the partition. |
timing_seconds | Total time taken to retrieve all records in the partition (in seconds). |
null_value_and_comment_count | Number of records with NULL values in the Value and Comment columns. |
null_memberId_count | Number of records with NULL values in the dimension columns. |
dimension_member_na_count | Number of records referring to dimension members that do not exist in the dimension. |
hierarchy_member_na_count | Number of records referring to members that exist in the dimension but not in the member list. |
Step 3. Understand the results
Below are common data quality issues, their descriptions, and recommended resolutions:
null_value_and_comment_count
- Description: Number of records in the partition where both Value and Comment columns contain NULL values.
- Resolution: Delete the records.
null_memberId_count
- Description: Number of records in the partition where dimension columns contain NULL values, often due to misconfigured data rules.
- Resolution: Navigate to the specific partition and root cause the issue. Delete the problematic records.
dimension_member_na_count
- Description: Number of records in the partition with dimension columns referring to members that do not exist in the dimension.
- Resolution: Navigate to the specific partition and root cause the issue. Identify the missing members:
-
If they were removed unintentionally, add them back.
-
Otherwise, delete the invalid records in the partition.
-
hierarchy_member_na_count
- Description: Number of records referring to members that exist in the dimension but are missing from the member list.
- Resolution: Navigate to the specific partition and root cause the issue. Identify the missing members:
-
If the members should exist, add them to the member list.
-
Otherwise, delete the invalid records from the partition.
-