data-integrity-auditor

安装量: 44
排名: #16819

安装

npx skills add https://github.com/patricio0312rev/skills --skill data-integrity-auditor

Data Integrity Auditor

Detect and fix data integrity issues automatically.

Integrity Check Types 1. Orphaned Records -- Find orphaned orders (no matching user) SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL;

-- Find orphaned order items (no matching order) SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL;

  1. Broken Foreign Keys // scripts/check-foreign-keys.ts async function checkForeignKeys() { const issues: string[] = [];

// Orders → Users const orphanedOrders = await prisma.$queryRawSELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL LIMIT 100;

if (orphanedOrders.length > 0) { issues.push( ❌ Found ${orphanedOrders.length} orders with invalid user_id ); console.log( " Sample IDs:", orphanedOrders.slice(0, 5).map((o) => o.id) ); }

// Order Items → Orders const orphanedItems = await prisma.$queryRawSELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL LIMIT 100;

if (orphanedItems.length > 0) { issues.push( ❌ Found ${orphanedItems.length} order items with invalid order_id ); }

// Products → Categories const orphanedProducts = await prisma.$queryRawSELECT p.id, p.category_id FROM products p LEFT JOIN categories c ON c.id = p.category_id WHERE p.category_id IS NOT NULL AND c.id IS NULL LIMIT 100;

if (orphanedProducts.length > 0) { issues.push( ❌ Found ${orphanedProducts.length} products with invalid category_id ); }

return issues; }

  1. Constraint Violations async function checkConstraints() { const issues: string[] = [];

// Check email uniqueness (should be caught by DB, but verify) const duplicateEmails = await prisma.$queryRawSELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1;

if (duplicateEmails.length > 0) { issues.push(❌ Found ${duplicateEmails.length} duplicate emails); }

// Check negative quantities const negativeStock = await prisma.$queryRawSELECT id, name, stock FROM products WHERE stock < 0;

if (negativeStock.length > 0) { issues.push( ❌ Found ${negativeStock.length} products with negative stock ); }

// Check negative prices const negativePrices = await prisma.$queryRawSELECT id, name, price FROM products WHERE price < 0;

if (negativePrices.length > 0) { issues.push( ❌ Found ${negativePrices.length} products with negative prices ); }

// Check invalid order status const invalidStatus = await prisma.$queryRawSELECT id, status FROM orders WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled');

if (invalidStatus.length > 0) { issues.push(❌ Found ${invalidStatus.length} orders with invalid status); }

return issues; }

  1. Missing Required Fields async function checkMissingFields() { const issues: string[] = [];

// Users missing required fields const usersNoEmail = await prisma.user.count({ where: { email: null }, });

if (usersNoEmail > 0) { issues.push(❌ Found ${usersNoEmail} users without email); }

// Orders with NULL totals const ordersNoTotal = await prisma.order.count({ where: { total: null }, });

if (ordersNoTotal > 0) { issues.push(❌ Found ${ordersNoTotal} orders without total); }

return issues; }

Comprehensive Audit Script // scripts/audit-data-integrity.ts interface IntegrityIssue { severity: "critical" | "warning" | "info"; category: string; message: string; count: number; query?: string; fix?: string; }

async function auditDataIntegrity(): Promise { const issues: IntegrityIssue[] = [];

console.log("🔍 Auditing data integrity...\n");

// 1. Check orphaned records const orphanedOrders = await prisma.$queryRawSELECT COUNT(*) as count FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL;

if (orphanedOrders[0].count > 0) { issues.push({ severity: "critical", category: "orphaned-records", message: "Orders with invalid user references", count: orphanedOrders[0].count, query: "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL", fix: "DELETE FROM orders WHERE id IN (...)", }); }

// 2. Check duplicate unique constraints const duplicateEmails = await prisma.$queryRawSELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1;

if (duplicateEmails.length > 0) { issues.push({ severity: "critical", category: "constraint-violation", message: "Duplicate email addresses", count: duplicateEmails.length, fix: "Keep newest record, delete duplicates", }); }

// 3. Check data inconsistencies const invalidPrices = await prisma.$queryRawSELECT COUNT(*) as count FROM products WHERE price < 0;

if (invalidPrices[0].count > 0) { issues.push({ severity: "warning", category: "data-quality", message: "Products with negative prices", count: invalidPrices[0].count, fix: "UPDATE products SET price = ABS(price) WHERE price < 0", }); }

// 4. Check referential integrity const brokenOrderItems = await prisma.$queryRawSELECT COUNT(*) as count FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL;

if (brokenOrderItems[0].count > 0) { issues.push({ severity: "critical", category: "referential-integrity", message: "Order items referencing non-existent orders", count: brokenOrderItems[0].count, fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)", }); }

return issues; }

async function generateReport() { const issues = await auditDataIntegrity();

console.log("\n📊 Data Integrity Report\n"); console.log(Total issues: ${issues.length}\n);

const grouped = issues.reduce((acc, issue) => { if (!acc[issue.severity]) acc[issue.severity] = []; acc[issue.severity].push(issue); return acc; }, {} as Record);

(["critical", "warning", "info"] as const).forEach((severity) => { const items = grouped[severity] || []; if (items.length === 0) return;

console.log(`\n${severity.toUpperCase()} (${items.length})\n`);

items.forEach((issue, i) => {
  console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
  console.log(`   Count: ${issue.count}`);
  if (issue.query) {
    console.log(`   Query: ${issue.query.substring(0, 80)}...`);
  }
  if (issue.fix) {
    console.log(`   Fix: ${issue.fix}`);
  }
  console.log();
});

});

// Exit with error if critical issues process.exit(grouped.critical?.length > 0 ? 1 : 0); }

generateReport();

Automated Fixes // scripts/fix-integrity-issues.ts async function fixOrphanedRecords() { console.log("🔧 Fixing orphaned records...\n");

// Delete orphaned orders const deletedOrders = await prisma.$executeRawDELETE FROM orders WHERE id IN ( SELECT o.id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ); console.log(✅ Deleted ${deletedOrders} orphaned orders);

// Delete orphaned order items const deletedItems = await prisma.$executeRawDELETE FROM order_items WHERE id IN ( SELECT oi.id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL ); console.log(✅ Deleted ${deletedItems} orphaned order items); }

async function fixDuplicates() { console.log("🔧 Fixing duplicate records...\n");

// Keep newest user, delete old duplicates await prisma.$executeRawDELETE FROM users WHERE id IN ( SELECT id FROM ( SELECT id, ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn FROM users ) t WHERE rn > 1 ); console.log(✅ Fixed duplicate emails); }

async function fixConstraintViolations() { console.log("🔧 Fixing constraint violations...\n");

// Fix negative prices const fixedPrices = await prisma.$executeRawUPDATE products SET price = ABS(price) WHERE price < 0; console.log(✅ Fixed ${fixedPrices} negative prices);

// Fix negative stock const fixedStock = await prisma.$executeRawUPDATE products SET stock = 0 WHERE stock < 0; console.log(✅ Fixed ${fixedStock} negative stock values); }

Prevention: Add Missing Constraints -- Migration to add missing constraints

-- 1. Add foreign key constraints ALTER TABLE orders ADD CONSTRAINT fk_orders_user_id FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;

ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE;

-- 2. Add check constraints ALTER TABLE products ADD CONSTRAINT chk_products_price_positive CHECK (price >= 0);

ALTER TABLE products ADD CONSTRAINT chk_products_stock_non_negative CHECK (stock >= 0);

-- 3. Add unique constraints CREATE UNIQUE INDEX idx_users_email_unique ON users(LOWER(email));

-- 4. Add NOT NULL constraints ALTER TABLE users ALTER COLUMN email SET NOT NULL;

ALTER TABLE orders ALTER COLUMN total SET NOT NULL;

Automated Testing // tests/data-integrity.test.ts describe("Data Integrity", () => { it("should not allow orphaned orders", async () => { // Try to create order with non-existent user await expect( prisma.order.create({ data: { userId: 99999, // Non-existent total: 100, status: "pending", }, }) ).rejects.toThrow("Foreign key constraint"); });

it("should not allow negative prices", async () => { await expect( prisma.product.create({ data: { name: "Test", price: -10, // Invalid stock: 100, }, }) ).rejects.toThrow("Check constraint"); });

it("should not allow duplicate emails", async () => { await prisma.user.create({ data: { email: "test@example.com", name: "Test" }, });

await expect(
  prisma.user.create({
    data: { email: "test@example.com", name: "Test 2" },
  })
).rejects.toThrow("Unique constraint");

}); });

Monitoring Dashboard // Monitor data quality metrics async function getDataQualityMetrics() { return { orphanedOrders: await prisma.$queryRawSELECT COUNT(*) FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL, duplicateEmails: await prisma.$queryRawSELECT COUNT(*) FROM ( SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1 ) t, invalidPrices: await prisma.$queryRawSELECT COUNT(*) FROM products WHERE price < 0, missingData: await prisma.$queryRawSELECT SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email, SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total FROM users CROSS JOIN orders, }; }

Best Practices Add constraints: Prevent issues at database level Regular audits: Weekly integrity checks Automated fixes: Safe, reversible repairs Monitor metrics: Track data quality over time Test constraints: Ensure they work Soft deletes: Easier recovery Backup before fixes: Always Output Checklist Orphaned record detection Foreign key integrity checks Constraint violation detection Missing field checks Automated audit script Fix scripts (with dry-run) Prevention migrations (add constraints) Automated tests Monitoring dashboard Regular audit schedule

返回排行榜