Migration Guide

Zero Downtime v1.5.2 Updated Jan 30, 2025

Comprehensive migration guide for Neo Service Layer with automated tools, zero-downtime strategies, and enterprise-grade data migration patterns. Successfully migrate your infrastructure, data, and applications with confidence using our battle-tested procedures.

Migration Scenarios

Version Upgrades

Upgrade from older versions to v1.5.2 with automatic compatibility checks

Platform Migration

Migrate between cloud providers (AWS, Azure, GCP) or on-premise

Data Migration

Transfer terabytes of data with integrity verification and minimal downtime

Blockchain Migration

Migrate between Neo N3 and Neo X or upgrade smart contracts

Migration Features

Zero-downtime migrations using blue-green deployments
Automated rollback with instant recovery capability
Data integrity validation with cryptographic verification
Progressive migration with canary deployments
Real-time monitoring during migration process
Comprehensive logging for audit trails
Automated testing at each migration phase
Multi-region support for global deployments

Version Compatibility

Critical

Compatibility Matrix

Current Version	Target Version	Direct Upgrade	Data Migration Required	Breaking Changes	Downtime Required
v1.4.x	v1.5.2	✓ Yes	✗ No	✗ None	✗ No
v1.3.x	v1.5.2	✓ Yes	⚠ Partial	⚠ Minor	✗ No
v1.2.x	v1.5.2	⚠ Via v1.4	✓ Yes	✓ Major	⚠ Optional
v1.0.x - v1.1.x	v1.5.2	✗ Multi-step	✓ Yes	✓ Major	✓ Yes

Breaking Changes in v1.5.x

API Changes

BlockchainType parameter now required for all operations
Authentication header format changed from X-API-Key to Authorization: Bearer
Response format standardized with data wrapper
Error codes restructured with service prefixes

Database Schema Changes

New columns added for multi-blockchain support
Index restructuring for performance optimization
Partitioning implemented for large tables
Foreign key constraints added for data integrity

Configuration Changes

Environment variables renamed for consistency
YAML configuration format updated
New required security parameters
Service discovery mechanism changed

Migration Strategies

Best Practices

Blue-Green Deployment

Low Risk

Deploy new version alongside existing version and switch traffic atomically:


#!/bin/bash
# Blue-Green Migration Script

# Step 1: Deploy Green environment
echo "Deploying Green environment (v1.5.2)..."
kubectl apply -f deployment-green.yaml -n neo-green

# Step 2: Wait for Green to be ready
kubectl wait --for=condition=ready pod -l app=neo-service,version=v1.5.2 -n neo-green --timeout=600s

# Step 3: Run smoke tests
echo "Running smoke tests on Green environment..."
./scripts/smoke-tests.sh https://green.neo-internal.com

# Step 4: Gradually shift traffic
echo "Starting traffic migration..."
for weight in 10 25 50 75 90 100; do
    echo "Shifting ${weight}% traffic to Green..."
    kubectl patch virtualservice neo-service -n istio-system --type merge -p '
    {
      "spec": {
        "http": [
          {
            "match": [{"headers": {"canary": {"exact": "true"}}}],
            "route": [{"destination": {"host": "neo-service-green", "port": {"number": 80}}}]
          },
          {
            "route": [
              {"destination": {"host": "neo-service-blue", "port": {"number": 80}}, "weight": '$((100-weight))'},
              {"destination": {"host": "neo-service-green", "port": {"number": 80}}, "weight": '$weight'}
            ]
          }
        ]
      }
    }'
    
    # Monitor error rates
    sleep 300 # 5 minutes
    ERROR_RATE=$(curl -s http://prometheus:9090/api/v1/query?query=rate(http_requests_total{status=~"5.."}[5m]) | jq -r '.data.result[0].value[1]')
    
    if (( $(echo "$ERROR_RATE > 0.01" | bc -l) )); then
        echo "Error rate too high, rolling back..."
        kubectl patch virtualservice neo-service -n istio-system --type merge -p '
        {"spec": {"http": [{"route": [{"destination": {"host": "neo-service-blue"}}]}]}}'
        exit 1
    fi
done

echo "Migration completed successfully!"

# Step 5: Cleanup Blue environment after validation period
echo "Schedule Blue environment cleanup in 24 hours..."
at now + 24 hours <



                        Advantages
                        
                            Zero downtime deployment
                            Instant rollback capability
                            Easy validation of new version
                            No data migration during switch



                    
                    
                        
                            
                                
                            
                            Canary Deployment
                            Medium Risk
                        
                        
                        Gradually roll out changes to a subset of users:
                        
                        
                            
# Flagger Canary Configuration
apiVersion: flagger.app/v1beta1
kind: Canary
metadata:
  name: neo-service-canary
  namespace: neo-prod
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: neo-service
  
  # Progressive delivery configuration
  progressDeadlineSeconds: 3600
  
  service:
    port: 80
    targetPort: 8080
    gateways:
    - public-gateway.istio-system.svc.cluster.local
    hosts:
    - api.neoservicelayer.com
    
  analysis:
    # Canary analysis interval
    interval: 1m
    
    # Max number of failed checks
    threshold: 5
    
    # Max traffic percentage
    maxWeight: 50
    
    # Canary increment step
    stepWeight: 10
    
    # Metrics for canary analysis
    metrics:
    - name: request-success-rate
      thresholdRange:
        min: 99
      interval: 1m
      
    - name: request-duration
      thresholdRange:
        max: 500
      interval: 30s
      
    - name: error-rate
      templateRef:
        name: error-rate
        namespace: flagger-system
      thresholdRange:
        max: 1
      interval: 1m
      
    # Load testing during canary
    webhooks:
    - name: load-test
      type: rollout
      url: http://flagger-loadtester.test/
      timeout: 5s
      metadata:
        cmd: "hey -z 1m -q 10 -c 2 -H 'Authorization: Bearer test' https://api.neoservicelayer.com/"
        
    - name: acceptance-test
      type: pre-rollout
      url: http://flagger-loadtester.test/
      timeout: 30s
      metadata:
        type: bash
        cmd: "curl -s https://api.neoservicelayer.com/health | grep -q 'healthy'"
        
    # Alerts
    alerts:
    - name: "canary-rollout"
      severity: info
      providerRef:
        name: slack
        namespace: flagger-system
                            
                        
                    

                    
                    
                        
                            
                                
                            
                            Rolling Update
                            Low Risk
                        
                        
                        Update instances one at a time with health checks:
                        
                        
                            
apiVersion: apps/v1
kind: Deployment
metadata:
  name: neo-service
spec:
  replicas: 10
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 2        # Max pods above desired replicas
      maxUnavailable: 1  # Max pods unavailable during update
      
  template:
    spec:
      containers:
      - name: neo-service
        image: neo-service:v1.5.2
        
        # Readiness probe for safe rolling
        readinessProbe:
          httpGet:
            path: /health/ready
            port: 8080
          initialDelaySeconds: 30
          periodSeconds: 5
          successThreshold: 2
          failureThreshold: 3
          
        # Liveness probe
        livenessProbe:
          httpGet:
            path: /health/live
            port: 8080
          initialDelaySeconds: 60
          periodSeconds: 10
          
        # Graceful shutdown
        lifecycle:
          preStop:
            exec:
              command:
              - /bin/sh
              - -c
              - |
                # Signal service to stop accepting new requests
                curl -X POST localhost:8080/admin/drain
                # Wait for in-flight requests to complete
                sleep 30



            
            
                
                    Migration Timeline
                    
                        Planning
                    
                

                
                    
                        Typical Enterprise Migration Timeline
                        Complete migration process from planning to validation:
                        
                        
                            1
                            
                                Planning & Assessment
                                1-2 weeks
                                
                                    Inventory current infrastructure and dependencies
                                    Analyze breaking changes and compatibility
                                    Define success metrics and rollback criteria
                                    Create detailed migration plan and runbooks
                                
                            
                        
                        
                        
                            2
                            
                                Environment Preparation
                                1 week
                                
                                    Provision new infrastructure resources
                                    Set up monitoring and alerting
                                    Configure security policies and certificates
                                    Install migration tools and scripts
                                
                            
                        
                        
                        
                            3
                            
                                Data Migration
                                2-5 days
                                
                                    Create database backups and snapshots
                                    Run schema migration scripts
                                    Perform initial data sync
                                    Set up continuous replication
                                
                            
                        
                        
                        
                            4
                            
                                Service Deployment
                                1-2 days
                                
                                    Deploy new version in parallel
                                    Configure service mesh and routing
                                    Run integration tests
                                    Validate service health
                                
                            
                        
                        
                        
                            5
                            
                                Traffic Migration
                                4-8 hours
                                
                                    Start with canary traffic (5%)
                                    Monitor metrics and error rates
                                    Gradually increase traffic percentage
                                    Complete traffic switchover
                                
                            
                        
                        
                        
                            6
                            
                                Validation & Cleanup
                                1 week
                                
                                    Run comprehensive test suites
                                    Verify data integrity
                                    Monitor for stability
                                    Decommission old infrastructure
                                
                            
                        
                    
                
            

            
            
                
                    Data Migration
                    
                        Critical Path
                    
                

                
                    
                        Zero-Downtime Data Migration
                        
                        
                            Migration Flow
                            
                                Old Database
                                →
                                CDC Replication
                                →
                                New Database
                            
                        

                        
                        
                            
                                
                                    
                                
                                Database Migration Tool
                            
                            Automated database migration with schema versioning and data validation:
                            
                            
                                
#!/usr/bin/env python3
"""
Neo Service Layer Database Migration Tool
Handles schema migrations and data transfer with zero downtime
"""

import asyncio
import asyncpg
import logging
from datetime import datetime
from typing import Dict, List, Optional
import click
import yaml
from dataclasses import dataclass

@dataclass
class MigrationConfig:
    source_db: str
    target_db: str
    batch_size: int = 10000
    parallel_workers: int = 4
    validate_data: bool = True
    use_cdc: bool = True

class DatabaseMigrator:
    def __init__(self, config: MigrationConfig):
        self.config = config
        self.source_pool = None
        self.target_pool = None
        self.logger = logging.getLogger(__name__)
        
    async def connect(self):
        """Establish connection pools"""
        self.source_pool = await asyncpg.create_pool(
            self.config.source_db,
            min_size=10,
            max_size=20
        )
        self.target_pool = await asyncpg.create_pool(
            self.config.target_db,
            min_size=10,
            max_size=20
        )
        
    async def migrate_schema(self):
        """Migrate database schema with version tracking"""
        self.logger.info("Starting schema migration...")
        
        async with self.source_pool.acquire() as source_conn:
            # Get current schema version
            current_version = await source_conn.fetchval(
                "SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1"
            )
            
        # Apply migration scripts
        migrations_path = "./migrations"
        for migration_file in sorted(os.listdir(migrations_path)):
            version = migration_file.split('_')[0]
            if version > current_version:
                self.logger.info(f"Applying migration {migration_file}")
                
                with open(f"{migrations_path}/{migration_file}", 'r') as f:
                    migration_sql = f.read()
                    
                async with self.target_pool.acquire() as target_conn:
                    async with target_conn.transaction():
                        await target_conn.execute(migration_sql)
                        await target_conn.execute(
                            "INSERT INTO schema_migrations (version, applied_at) VALUES ($1, $2)",
                            version, datetime.utcnow()
                        )
                        
    async def setup_cdc(self):
        """Setup Change Data Capture for real-time sync"""
        self.logger.info("Setting up CDC replication...")
        
        async with self.source_pool.acquire() as conn:
            # Create logical replication slot
            await conn.execute("""
                SELECT pg_create_logical_replication_slot('neo_migration_slot', 'pgoutput')
                WHERE NOT EXISTS (
                    SELECT 1 FROM pg_replication_slots 
                    WHERE slot_name = 'neo_migration_slot'
                )
            """)
            
            # Create publication for all tables
            await conn.execute("""
                CREATE PUBLICATION neo_migration_pub FOR ALL TABLES
            """)
            
    async def migrate_table(self, table_name: str, where_clause: str = ""):
        """Migrate a single table with progress tracking"""
        self.logger.info(f"Migrating table: {table_name}")
        
        # Get total row count
        async with self.source_pool.acquire() as conn:
            total_rows = await conn.fetchval(
                f"SELECT COUNT(*) FROM {table_name} {where_clause}"
            )
            
        self.logger.info(f"Total rows to migrate: {total_rows:,}")
        
        # Migrate in batches
        offset = 0
        migrated = 0
        
        while offset < total_rows:
            async with self.source_pool.acquire() as source_conn:
                # Fetch batch
                query = f"""
                    SELECT * FROM {table_name} 
                    {where_clause}
                    ORDER BY id
                    LIMIT $1 OFFSET $2
                """
                rows = await source_conn.fetch(query, self.config.batch_size, offset)
                
            if not rows:
                break
                
            # Insert batch into target
            async with self.target_pool.acquire() as target_conn:
                # Build insert query dynamically
                columns = list(rows[0].keys())
                placeholders = ', '.join(f'${i+1}' for i in range(len(columns)))
                insert_query = f"""
                    INSERT INTO {table_name} ({', '.join(columns)})
                    VALUES ({placeholders})
                    ON CONFLICT (id) DO UPDATE SET
                    {', '.join(f'{col} = EXCLUDED.{col}' for col in columns if col != 'id')}
                """
                
                # Bulk insert
                async with target_conn.transaction():
                    await target_conn.executemany(
                        insert_query,
                        [tuple(row.values()) for row in rows]
                    )
                    
            migrated += len(rows)
            offset += self.config.batch_size
            
            # Progress update
            progress = (migrated / total_rows) * 100
            self.logger.info(f"Progress: {progress:.1f}% ({migrated:,}/{total_rows:,})")
            
    async def validate_migration(self, table_name: str) -> bool:
        """Validate data integrity after migration"""
        self.logger.info(f"Validating table: {table_name}")
        
        # Compare row counts
        async with self.source_pool.acquire() as source_conn:
            source_count = await source_conn.fetchval(
                f"SELECT COUNT(*) FROM {table_name}"
            )
            source_checksum = await source_conn.fetchval(
                f"SELECT MD5(CAST(array_agg(t.* ORDER BY id) AS text)) FROM {table_name} t"
            )
            
        async with self.target_pool.acquire() as target_conn:
            target_count = await target_conn.fetchval(
                f"SELECT COUNT(*) FROM {table_name}"
            )
            target_checksum = await target_conn.fetchval(
                f"SELECT MD5(CAST(array_agg(t.* ORDER BY id) AS text)) FROM {table_name} t"
            )
            
        if source_count != target_count:
            self.logger.error(f"Row count mismatch: {source_count} vs {target_count}")
            return False
            
        if source_checksum != target_checksum:
            self.logger.error("Data checksum mismatch")
            return False
            
        self.logger.info(f"Validation passed: {source_count:,} rows")
        return True
        
    async def run_migration(self, tables: List[str]):
        """Run complete migration process"""
        try:
            await self.connect()
            await self.migrate_schema()
            
            if self.config.use_cdc:
                await self.setup_cdc()
                
            # Migrate tables in parallel
            tasks = []
            for table in tables:
                task = asyncio.create_task(self.migrate_table(table))
                tasks.append(task)
                
            await asyncio.gather(*tasks)
            
            # Validate if enabled
            if self.config.validate_data:
                for table in tables:
                    if not await self.validate_migration(table):
                        raise Exception(f"Validation failed for {table}")
                        
            self.logger.info("Migration completed successfully!")
            
        finally:
            if self.source_pool:
                await self.source_pool.close()
            if self.target_pool:
                await self.target_pool.close()

@click.command()
@click.option('--config', '-c', required=True, help='Migration config file')
@click.option('--tables', '-t', multiple=True, help='Tables to migrate')
@click.option('--validate/--no-validate', default=True, help='Validate after migration')
def main(config: str, tables: tuple, validate: bool):
    """Neo Service Layer Database Migration Tool"""
    
    # Load configuration
    with open(config, 'r') as f:
        config_data = yaml.safe_load(f)
        
    migration_config = MigrationConfig(
        source_db=config_data['source_db'],
        target_db=config_data['target_db'],
        batch_size=config_data.get('batch_size', 10000),
        validate_data=validate
    )
    
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    
    # Run migration
    migrator = DatabaseMigrator(migration_config)
    asyncio.run(migrator.run_migration(list(tables)))

if __name__ == '__main__':
    main()
                                
                            
                        

                        
                        
                            
                                
                                    
                                
                                Migration Configuration
                            
                            Example configuration for production migration:
                            
                            
                                
# migration-config.yaml
source_db: postgresql://user:pass@old-db:5432/neo_service_layer
target_db: postgresql://user:pass@new-db:5432/neo_service_layer_v2

# Batch processing
batch_size: 10000
parallel_workers: 8

# Data validation
validate_data: true
checksum_validation: true

# CDC Configuration
use_cdc: true
cdc_slot_name: neo_migration_slot

# Tables to migrate (in order)
tables:
  - name: users
    where_clause: ""
    priority: 1
    
  - name: storage_items
    where_clause: "WHERE created_at > '2024-01-01'"
    priority: 2
    partition_by: created_at
    
  - name: transactions
    where_clause: ""
    priority: 3
    batch_size: 50000  # Larger batches for this table
    
  - name: identity_credentials
    where_clause: ""
    priority: 4
    sensitive: true  # Extra encryption during transfer

# Schema migrations
schema:
  pre_migration:
    - file: 001_create_indexes.sql
    - file: 002_add_blockchain_columns.sql
    
  post_migration:
    - file: 003_update_constraints.sql
    - file: 004_rebuild_materialized_views.sql

# Monitoring
monitoring:
  metrics_endpoint: http://prometheus:9090
  alert_webhook: https://slack.webhook.url
  progress_interval: 60  # seconds
  
# Rollback configuration
rollback:
  enabled: true
  snapshot_before: true
  keep_snapshots: 3
                                
                            
                        
                    
                
            

            
            
                
                    Migration Tools
                    
                        Automation
                    
                

                
                    Automated Migration Toolkit
                    
                    
                    
                        
                            
                                
                            
                            Neo Migration CLI
                        
                        Command-line tool for automated migrations:
                        
                        
                            
# Install Neo Migration CLI
npm install -g @neo-service-layer/migration-cli

# Initialize migration project
neo-migrate init --type version-upgrade --from v1.4.0 --to v1.5.2

# Analyze current environment
neo-migrate analyze --environment production
# Output:
# ✓ 22 services detected
# ✓ 3.2TB data to migrate
# ✓ 15 breaking changes identified
# ⚠ 3 manual interventions required

# Generate migration plan
neo-migrate plan --strategy blue-green --zero-downtime
# Creates: migration-plan-2025-01-30.yaml

# Validate migration plan
neo-migrate validate --plan migration-plan-2025-01-30.yaml

# Execute migration with monitoring
neo-migrate execute \
  --plan migration-plan-2025-01-30.yaml \
  --monitor \
  --rollback-on-error \
  --slack-webhook $SLACK_WEBHOOK

# Check migration status
neo-migrate status --detailed

# Run post-migration validation
neo-migrate validate --post-migration --comprehensive
                            
                        
                    

                    
                    
                        
                            
                                
                            
                            Compatibility Checker
                        
                        Automated compatibility analysis tool:
                        
                        
                            
// compatibility-checker.js
const { CompatibilityChecker } = require('@neo-service-layer/migration-tools');

async function checkCompatibility() {
  const checker = new CompatibilityChecker({
    currentVersion: 'v1.4.2',
    targetVersion: 'v1.5.2',
    environment: 'production'
  });
  
  // API Compatibility Check
  const apiReport = await checker.checkAPICompatibility({
    currentSwagger: './api/v1.4.2/swagger.json',
    targetSwagger: './api/v1.5.2/swagger.json'
  });
  
  console.log('API Compatibility Report:');
  console.log(`- Breaking changes: ${apiReport.breakingChanges.length}`);
  console.log(`- Deprecated endpoints: ${apiReport.deprecations.length}`);
  console.log(`- New required parameters: ${apiReport.newRequired.length}`);
  
  // Database Schema Compatibility
  const schemaReport = await checker.checkSchemaCompatibility({
    currentSchema: await getCurrentSchema(),
    targetSchema: await getTargetSchema()
  });
  
  console.log('\nSchema Compatibility Report:');
  console.log(`- New tables: ${schemaReport.newTables.length}`);
  console.log(`- Modified columns: ${schemaReport.modifiedColumns.length}`);
  console.log(`- Dropped constraints: ${schemaReport.droppedConstraints.length}`);
  
  // Configuration Compatibility
  const configReport = await checker.checkConfigCompatibility({
    currentConfig: './config/v1.4.2/production.yaml',
    targetConfig: './config/v1.5.2/production.yaml'
  });
  
  // Generate compatibility report
  await checker.generateReport({
    output: './migration-compatibility-report.html',
    format: 'html',
    includeRecommendations: true
  });
  
  // Exit with error if critical issues found
  if (apiReport.breakingChanges.length > 0 && !apiReport.migrationPath) {
    console.error('❌ Critical compatibility issues found!');
    process.exit(1);
  }
  
  console.log('✅ Compatibility check passed with warnings');
}

checkCompatibility().catch(console.error);
                            
                        
                    

                    
                    
                        
                            
                                
                            
                            Data Validation Suite
                        
                        Comprehensive data integrity validation:
                        
                        
                            
#!/usr/bin/env python3
"""
Data Validation Suite for Neo Service Layer Migration
Ensures data integrity during and after migration
"""

import hashlib
import asyncio
from typing import Dict, List, Tuple
import pandas as pd
from deepdiff import DeepDiff

class DataValidator:
    def __init__(self, source_conn, target_conn):
        self.source = source_conn
        self.target = target_conn
        self.validation_results = []
        
    async def validate_table(self, table_name: str) -> Dict:
        """Comprehensive table validation"""
        results = {
            'table': table_name,
            'status': 'passed',
            'checks': []
        }
        
        # 1. Row count validation
        source_count = await self.source.fetchval(
            f"SELECT COUNT(*) FROM {table_name}"
        )
        target_count = await self.target.fetchval(
            f"SELECT COUNT(*) FROM {table_name}"
        )
        
        count_check = {
            'check': 'row_count',
            'passed': source_count == target_count,
            'source': source_count,
            'target': target_count
        }
        results['checks'].append(count_check)
        
        # 2. Schema validation
        source_schema = await self.get_table_schema(self.source, table_name)
        target_schema = await self.get_table_schema(self.target, table_name)
        
        schema_diff = DeepDiff(source_schema, target_schema, ignore_order=True)
        schema_check = {
            'check': 'schema_match',
            'passed': not schema_diff,
            'differences': str(schema_diff) if schema_diff else None
        }
        results['checks'].append(schema_check)
        
        # 3. Data checksum validation (sampling for large tables)
        if source_count > 1000000:
            # Sample validation for large tables
            sample_size = min(100000, int(source_count * 0.01))
            checksum_match = await self.validate_sample_checksum(
                table_name, sample_size
            )
        else:
            # Full validation for smaller tables
            checksum_match = await self.validate_full_checksum(table_name)
            
        checksum_check = {
            'check': 'data_checksum',
            'passed': checksum_match,
            'method': 'sample' if source_count > 1000000 else 'full'
        }
        results['checks'].append(checksum_check)
        
        # 4. Constraint validation
        constraints_valid = await self.validate_constraints(table_name)
        constraint_check = {
            'check': 'constraints',
            'passed': constraints_valid
        }
        results['checks'].append(constraint_check)
        
        # 5. Index validation
        indexes_valid = await self.validate_indexes(table_name)
        index_check = {
            'check': 'indexes',
            'passed': indexes_valid
        }
        results['checks'].append(index_check)
        
        # Overall status
        results['status'] = 'passed' if all(
            check['passed'] for check in results['checks']
        ) else 'failed'
        
        return results
        
    async def validate_full_checksum(self, table_name: str) -> bool:
        """Calculate and compare full table checksums"""
        
        # Get ordered data from both databases
        source_query = f"""
            SELECT MD5(
                CAST(
                    array_agg(
                        row_to_json(t) ORDER BY t.id
                    ) AS text
                )
            ) as checksum
            FROM {table_name} t
        """
        
        source_checksum = await self.source.fetchval(source_query)
        target_checksum = await self.target.fetchval(source_query)
        
        return source_checksum == target_checksum
        
    async def validate_sample_checksum(
        self, 
        table_name: str, 
        sample_size: int
    ) -> bool:
        """Validate using statistical sampling"""
        
        # Random sample validation
        sample_query = f"""
            WITH sample AS (
                SELECT * FROM {table_name}
                ORDER BY RANDOM()
                LIMIT {sample_size}
            )
            SELECT MD5(
                CAST(
                    array_agg(
                        row_to_json(t) ORDER BY t.id
                    ) AS text
                )
            ) as checksum
            FROM sample t
        """
        
        # Run multiple samples
        mismatches = 0
        for i in range(5):
            source_checksum = await self.source.fetchval(sample_query)
            target_checksum = await self.target.fetchval(sample_query)
            
            if source_checksum != target_checksum:
                mismatches += 1
                
        # Allow up to 20% mismatch rate for sampling
        return mismatches <= 1
        
    async def generate_validation_report(self) -> str:
        """Generate comprehensive validation report"""
        
        report = []
        report.append("# Neo Service Layer Migration Validation Report")
        report.append(f"Generated: {datetime.now().isoformat()}")
        report.append("\n## Summary")
        
        total_tables = len(self.validation_results)
        passed_tables = sum(
            1 for r in self.validation_results if r['status'] == 'passed'
        )
        
        report.append(f"- Total tables validated: {total_tables}")
        report.append(f"- Passed: {passed_tables}")
        report.append(f"- Failed: {total_tables - passed_tables}")
        report.append(f"- Success rate: {(passed_tables/total_tables)*100:.1f}%")
        
        report.append("\n## Detailed Results")
        
        for result in self.validation_results:
            report.append(f"\n### Table: {result['table']}")
            report.append(f"Status: **{result['status'].upper()}**")
            report.append("\n| Check | Result | Details |")
            report.append("|-------|--------|---------|")
            
            for check in result['checks']:
                status = "✅ PASS" if check['passed'] else "❌ FAIL"
                details = self.format_check_details(check)
                report.append(f"| {check['check']} | {status} | {details} |")
                
        return "\n".join(report)
        
    def format_check_details(self, check: Dict) -> str:
        """Format check details for report"""
        if check['check'] == 'row_count':
            return f"Source: {check['source']:,}, Target: {check['target']:,}"
        elif check['check'] == 'schema_match' and check['differences']:
            return "Schema differences detected"
        elif check['check'] == 'data_checksum':
            return f"Method: {check['method']}"
        else:
            return "See detailed log"

# Usage example
async def main():
    validator = DataValidator(source_conn, target_conn)
    
    tables = ['users', 'storage_items', 'transactions', 'identity_credentials']
    
    for table in tables:
        result = await validator.validate_table(table)
        validator.validation_results.append(result)
        
    report = await validator.generate_validation_report()
    
    with open('migration-validation-report.md', 'w') as f:
        f.write(report)
        
    # Exit with appropriate code
    failed_tables = sum(
        1 for r in validator.validation_results if r['status'] == 'failed'
    )
    
    if failed_tables > 0:
        print(f"❌ Validation failed for {failed_tables} tables")
        sys.exit(1)
    else:
        print("✅ All validations passed!")
        sys.exit(0)
                            
                        
                    
                
            

            
            
                
                    Pre-Migration Checklist
                    
                        Required
                    
                

                
                    
                        Complete Pre-Migration Checklist
                        Ensure all items are completed before starting migration:
                        
                        
                            📋 Planning & Documentation
                            
                                
                                
                                    Migration plan reviewed and approved by all stakeholders
                                
                            
                            
                                
                                
                                    Rollback procedures documented and tested
                                
                            
                            
                                
                                
                                    Communication plan prepared for users and teams
                                
                            
                            
                                
                                
                                    Maintenance window scheduled and communicated
                                
                            
                        
                        
                        
                            🔧 Technical Preparation
                            
                                
                                
                                    Full backup of production databases completed
                                
                            
                            
                                
                                
                                    Infrastructure resources provisioned and tested
                                
                            
                            
                                
                                
                                    Migration tools installed and configured
                                
                            
                            
                                
                                
                                    Monitoring and alerting systems configured
                                
                            
                            
                                
                                
                                    SSL certificates and security credentials prepared
                                
                            
                        
                        
                        
                            ✅ Validation & Testing
                            
                                
                                
                                    Dry run completed in staging environment
                                
                            
                            
                                
                                
                                    Performance benchmarks established
                                
                            
                            
                                
                                
                                    Automated test suite prepared and validated
                                
                            
                            
                                
                                
                                    Data validation scripts tested
                                
                            
                        
                        
                        
                            👥 Team Readiness
                            
                                
                                
                                    Migration team roles and responsibilities assigned
                                
                            
                            
                                
                                
                                    On-call rotation scheduled for migration window
                                
                            
                            
                                
                                
                                    Emergency contacts list updated and distributed
                                
                            
                            
                                
                                
                                    War room or communication channel established
                                
                            
                        
                    
                
            

            
            
                
                    Rollback Procedures
                    
                        Emergency
                    
                

                
                    
                        
                            
                            Emergency Rollback Plan
                        
                        
                        Automated rollback procedures for different scenarios:
                        
                        
                            
#!/bin/bash
# emergency-rollback.sh
# Automated rollback script for Neo Service Layer migration

set -e

# Configuration
ROLLBACK_REASON="${1:-Unspecified}"
ENVIRONMENT="${2:-production}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="/var/log/neo-migration/rollback_${TIMESTAMP}.log"

# Logging function
log() {
    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" | tee -a $LOG_FILE
}

# Send alerts
send_alert() {
    local message="$1"
    local severity="${2:-high}"
    
    # Slack notification
    curl -X POST $SLACK_WEBHOOK \
        -H 'Content-Type: application/json' \
        -d "{\"text\": \"🚨 ROLLBACK INITIATED: ${message}\"}"
    
    # PagerDuty alert
    curl -X POST https://events.pagerduty.com/v2/enqueue \
        -H 'Content-Type: application/json' \
        -d "{
            \"routing_key\": \"${PAGERDUTY_KEY}\",
            \"event_action\": \"trigger\",
            \"payload\": {
                \"summary\": \"Migration rollback: ${message}\",
                \"severity\": \"${severity}\",
                \"source\": \"neo-migration\",
                \"custom_details\": {
                    \"environment\": \"${ENVIRONMENT}\",
                    \"reason\": \"${ROLLBACK_REASON}\"
                }
            }
        }"
}

# Phase 1: Stop new traffic
rollback_phase1_stop_traffic() {
    log "Phase 1: Stopping new traffic to v1.5.2"
    
    # Update load balancer to stop sending traffic to new version
    kubectl patch service neo-service -n neo-prod -p '
    {
        "spec": {
            "selector": {
                "version": "v1.4.2"
            }
        }
    }'
    
    # Verify traffic switch
    sleep 10
    NEW_VERSION_TRAFFIC=$(curl -s http://prometheus:9090/api/v1/query?query=rate(http_requests_total{version="v1.5.2"}[1m]) | jq -r '.data.result[0].value[1]')
    
    if (( $(echo "$NEW_VERSION_TRAFFIC > 0" | bc -l) )); then
        log "ERROR: Traffic still flowing to new version"
        return 1
    fi
    
    log "Phase 1 completed: Traffic stopped"
    return 0
}

# Phase 2: Restore database
rollback_phase2_restore_database() {
    log "Phase 2: Restoring database to pre-migration state"
    
    # Stop replication
    psql -h $DB_HOST -U $DB_USER -d $DB_NAME -c "
        SELECT pg_drop_replication_slot('neo_migration_slot');
    "
    
    # Restore from snapshot
    if [[ -f "/backups/pre-migration-snapshot.sql" ]]; then
        log "Restoring from pre-migration snapshot"
        psql -h $DB_HOST -U $DB_USER -d $DB_NAME < /backups/pre-migration-snapshot.sql
    else
        log "ERROR: Pre-migration snapshot not found"
        return 1
    fi
    
    # Verify database state
    CURRENT_VERSION=$(psql -h $DB_HOST -U $DB_USER -d $DB_NAME -t -c "
        SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1;
    ")
    
    log "Database restored to version: $CURRENT_VERSION"
    return 0
}

# Phase 3: Restore services
rollback_phase3_restore_services() {
    log "Phase 3: Restoring services to v1.4.2"
    
    # Scale down new version
    kubectl scale deployment neo-service-v152 -n neo-prod --replicas=0
    
    # Scale up old version
    kubectl scale deployment neo-service-v142 -n neo-prod --replicas=10
    
    # Wait for pods to be ready
    kubectl wait --for=condition=ready pod -l app=neo-service,version=v1.4.2 -n neo-prod --timeout=300s
    
    # Verify service health
    for i in {1..10}; do
        HEALTH_CHECK=$(curl -s https://api.neoservicelayer.com/health || echo "FAIL")
        if [[ "$HEALTH_CHECK" == *"healthy"* ]]; then
            log "Service health check passed"
            return 0
        fi
        sleep 10
    done
    
    log "ERROR: Service health check failed"
    return 1
}

# Phase 4: Cleanup
rollback_phase4_cleanup() {
    log "Phase 4: Cleanup and validation"
    
    # Remove new version deployments
    kubectl delete deployment neo-service-v152 -n neo-prod
    
    # Clear cache
    redis-cli -h $REDIS_HOST FLUSHALL
    
    # Run validation tests
    npm run test:production
    
    # Generate rollback report
    cat > /tmp/rollback-report.json <