-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchecks.sql
More file actions
executable file
·43 lines (39 loc) · 1.56 KB
/
checks.sql
File metadata and controls
executable file
·43 lines (39 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
-- 1. Inconsistent Quantity (Critical)
-- Verifies that cancellations map to negative quantities, and normal sales map to positive.
SELECT
COUNT(*) AS invalid_quantity_count,
ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM data), 2) AS failure_percentage
FROM data
WHERE (Quantity > 0 AND InvoiceNo LIKE 'C%')
OR (Quantity <= 0 AND InvoiceNo NOT LIKE 'C%');
-- 2. Non-Positive UnitPrice (Critical)
-- Verifies that unit prices are greater than zero to protect revenue metrics.
SELECT
COUNT(*) AS invalid_price_count,
ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM data), 2) AS failure_percentage
FROM data
WHERE UnitPrice <= 0;
-- 3. Missing CustomerID (Warning)
-- Flags missing customer IDs (likely guest checkouts) for downstream cohort models.
SELECT
COUNT(*) AS missing_customer_count,
ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM data), 2) AS failure_percentage
FROM data
WHERE CustomerID IS NULL;
-- 4. Duplicate Invoice Lines (Critical)
-- Identifies exact row duplicates across core transactional columns.
SELECT
COUNT(*) AS duplicate_row_count
FROM (
SELECT InvoiceNo, StockCode, Quantity, UnitPrice, CustomerID
FROM data
GROUP BY InvoiceNo, StockCode, Quantity, UnitPrice, CustomerID
HAVING COUNT(*) > 1
) subquery;
-- 5. Missing Product Description (Warning)
-- Identifies records where the product description is missing or blank, impacting product-level reporting.
SELECT
COUNT(*) AS missing_description_count,
ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM data), 2) AS failure_percentage
FROM data
WHERE Description IS NULL;