Data Integrity Auditor
Detect and fix data integrity issues automatically.
Integrity Check Types 1. Orphaned Records -- Find orphaned orders (no matching user) SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL;
-- Find orphaned order items (no matching order) SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL;
- Broken Foreign Keys // scripts/check-foreign-keys.ts async function checkForeignKeys() { const issues: string[] = [];
// Orders → Users
const orphanedOrders = await prisma.$queryRawSELECT o.id, o.user_id
FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL
LIMIT 100;
if (orphanedOrders.length > 0) {
issues.push(
❌ Found ${orphanedOrders.length} orders with invalid user_id
);
console.log(
" Sample IDs:",
orphanedOrders.slice(0, 5).map((o) => o.id)
);
}
// Order Items → Orders
const orphanedItems = await prisma.$queryRawSELECT oi.id, oi.order_id
FROM order_items oi
LEFT JOIN orders o ON o.id = oi.order_id
WHERE o.id IS NULL
LIMIT 100;
if (orphanedItems.length > 0) {
issues.push(
❌ Found ${orphanedItems.length} order items with invalid order_id
);
}
// Products → Categories
const orphanedProducts = await prisma.$queryRawSELECT p.id, p.category_id
FROM products p
LEFT JOIN categories c ON c.id = p.category_id
WHERE p.category_id IS NOT NULL
AND c.id IS NULL
LIMIT 100;
if (orphanedProducts.length > 0) {
issues.push(
❌ Found ${orphanedProducts.length} products with invalid category_id
);
}
return issues; }
- Constraint Violations async function checkConstraints() { const issues: string[] = [];
// Check email uniqueness (should be caught by DB, but verify)
const duplicateEmails = await prisma.$queryRawSELECT email, COUNT(*) as count
FROM users
GROUP BY email
HAVING COUNT(*) > 1;
if (duplicateEmails.length > 0) {
issues.push(❌ Found ${duplicateEmails.length} duplicate emails);
}
// Check negative quantities
const negativeStock = await prisma.$queryRawSELECT id, name, stock
FROM products
WHERE stock < 0;
if (negativeStock.length > 0) {
issues.push(
❌ Found ${negativeStock.length} products with negative stock
);
}
// Check negative prices
const negativePrices = await prisma.$queryRawSELECT id, name, price
FROM products
WHERE price < 0;
if (negativePrices.length > 0) {
issues.push(
❌ Found ${negativePrices.length} products with negative prices
);
}
// Check invalid order status
const invalidStatus = await prisma.$queryRawSELECT id, status
FROM orders
WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled');
if (invalidStatus.length > 0) {
issues.push(❌ Found ${invalidStatus.length} orders with invalid status);
}
return issues; }
- Missing Required Fields async function checkMissingFields() { const issues: string[] = [];
// Users missing required fields const usersNoEmail = await prisma.user.count({ where: { email: null }, });
if (usersNoEmail > 0) {
issues.push(❌ Found ${usersNoEmail} users without email);
}
// Orders with NULL totals const ordersNoTotal = await prisma.order.count({ where: { total: null }, });
if (ordersNoTotal > 0) {
issues.push(❌ Found ${ordersNoTotal} orders without total);
}
return issues; }
Comprehensive Audit Script // scripts/audit-data-integrity.ts interface IntegrityIssue { severity: "critical" | "warning" | "info"; category: string; message: string; count: number; query?: string; fix?: string; }
async function auditDataIntegrity(): Promise
console.log("🔍 Auditing data integrity...\n");
// 1. Check orphaned records
const orphanedOrders = await prisma.$queryRawSELECT COUNT(*) as count FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL;
if (orphanedOrders[0].count > 0) { issues.push({ severity: "critical", category: "orphaned-records", message: "Orders with invalid user references", count: orphanedOrders[0].count, query: "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL", fix: "DELETE FROM orders WHERE id IN (...)", }); }
// 2. Check duplicate unique constraints
const duplicateEmails = await prisma.$queryRawSELECT email, COUNT(*) as count
FROM users
GROUP BY email
HAVING COUNT(*) > 1;
if (duplicateEmails.length > 0) { issues.push({ severity: "critical", category: "constraint-violation", message: "Duplicate email addresses", count: duplicateEmails.length, fix: "Keep newest record, delete duplicates", }); }
// 3. Check data inconsistencies
const invalidPrices = await prisma.$queryRawSELECT COUNT(*) as count FROM products WHERE price < 0;
if (invalidPrices[0].count > 0) { issues.push({ severity: "warning", category: "data-quality", message: "Products with negative prices", count: invalidPrices[0].count, fix: "UPDATE products SET price = ABS(price) WHERE price < 0", }); }
// 4. Check referential integrity
const brokenOrderItems = await prisma.$queryRawSELECT COUNT(*) as count FROM order_items oi
LEFT JOIN orders o ON o.id = oi.order_id
WHERE o.id IS NULL;
if (brokenOrderItems[0].count > 0) { issues.push({ severity: "critical", category: "referential-integrity", message: "Order items referencing non-existent orders", count: brokenOrderItems[0].count, fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)", }); }
return issues; }
async function generateReport() { const issues = await auditDataIntegrity();
console.log("\n📊 Data Integrity Report\n");
console.log(Total issues: ${issues.length}\n);
const grouped = issues.reduce((acc, issue) => {
if (!acc[issue.severity]) acc[issue.severity] = [];
acc[issue.severity].push(issue);
return acc;
}, {} as Record
(["critical", "warning", "info"] as const).forEach((severity) => { const items = grouped[severity] || []; if (items.length === 0) return;
console.log(`\n${severity.toUpperCase()} (${items.length})\n`);
items.forEach((issue, i) => {
console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
console.log(` Count: ${issue.count}`);
if (issue.query) {
console.log(` Query: ${issue.query.substring(0, 80)}...`);
}
if (issue.fix) {
console.log(` Fix: ${issue.fix}`);
}
console.log();
});
});
// Exit with error if critical issues process.exit(grouped.critical?.length > 0 ? 1 : 0); }
generateReport();
Automated Fixes // scripts/fix-integrity-issues.ts async function fixOrphanedRecords() { console.log("🔧 Fixing orphaned records...\n");
// Delete orphaned orders
const deletedOrders = await prisma.$executeRawDELETE FROM orders
WHERE id IN (
SELECT o.id FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL
);
console.log(✅ Deleted ${deletedOrders} orphaned orders);
// Delete orphaned order items
const deletedItems = await prisma.$executeRawDELETE FROM order_items
WHERE id IN (
SELECT oi.id FROM order_items oi
LEFT JOIN orders o ON o.id = oi.order_id
WHERE o.id IS NULL
);
console.log(✅ Deleted ${deletedItems} orphaned order items);
}
async function fixDuplicates() { console.log("🔧 Fixing duplicate records...\n");
// Keep newest user, delete old duplicates
await prisma.$executeRawDELETE FROM users
WHERE id IN (
SELECT id FROM (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
FROM users
) t
WHERE rn > 1
);
console.log(✅ Fixed duplicate emails);
}
async function fixConstraintViolations() { console.log("🔧 Fixing constraint violations...\n");
// Fix negative prices
const fixedPrices = await prisma.$executeRawUPDATE products
SET price = ABS(price)
WHERE price < 0;
console.log(✅ Fixed ${fixedPrices} negative prices);
// Fix negative stock
const fixedStock = await prisma.$executeRawUPDATE products
SET stock = 0
WHERE stock < 0;
console.log(✅ Fixed ${fixedStock} negative stock values);
}
Prevention: Add Missing Constraints -- Migration to add missing constraints
-- 1. Add foreign key constraints ALTER TABLE orders ADD CONSTRAINT fk_orders_user_id FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;
ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE;
-- 2. Add check constraints ALTER TABLE products ADD CONSTRAINT chk_products_price_positive CHECK (price >= 0);
ALTER TABLE products ADD CONSTRAINT chk_products_stock_non_negative CHECK (stock >= 0);
-- 3. Add unique constraints CREATE UNIQUE INDEX idx_users_email_unique ON users(LOWER(email));
-- 4. Add NOT NULL constraints ALTER TABLE users ALTER COLUMN email SET NOT NULL;
ALTER TABLE orders ALTER COLUMN total SET NOT NULL;
Automated Testing // tests/data-integrity.test.ts describe("Data Integrity", () => { it("should not allow orphaned orders", async () => { // Try to create order with non-existent user await expect( prisma.order.create({ data: { userId: 99999, // Non-existent total: 100, status: "pending", }, }) ).rejects.toThrow("Foreign key constraint"); });
it("should not allow negative prices", async () => { await expect( prisma.product.create({ data: { name: "Test", price: -10, // Invalid stock: 100, }, }) ).rejects.toThrow("Check constraint"); });
it("should not allow duplicate emails", async () => { await prisma.user.create({ data: { email: "test@example.com", name: "Test" }, });
await expect(
prisma.user.create({
data: { email: "test@example.com", name: "Test 2" },
})
).rejects.toThrow("Unique constraint");
}); });
Monitoring Dashboard
// Monitor data quality metrics
async function getDataQualityMetrics() {
return {
orphanedOrders: await prisma.$queryRawSELECT COUNT(*) FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL,
duplicateEmails: await prisma.$queryRawSELECT COUNT(*) FROM (
SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1
) t,
invalidPrices: await prisma.$queryRawSELECT COUNT(*) FROM products WHERE price < 0,
missingData: await prisma.$queryRawSELECT
SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email,
SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total
FROM users
CROSS JOIN orders,
};
}
Best Practices Add constraints: Prevent issues at database level Regular audits: Weekly integrity checks Automated fixes: Safe, reversible repairs Monitor metrics: Track data quality over time Test constraints: Ensure they work Soft deletes: Easier recovery Backup before fixes: Always Output Checklist Orphaned record detection Foreign key integrity checks Constraint violation detection Missing field checks Automated audit script Fix scripts (with dry-run) Prevention migrations (add constraints) Automated tests Monitoring dashboard Regular audit schedule