Skip to main content

Quick Diagnostics

# Check service health
curl http://localhost:40401/health

# Test WebSocket connection
websocat ws://localhost:40403/vendor/connect

# Check Docker services
docker-compose ps

# View service logs
docker-compose logs -f api-gateway

Common Issues

Service Won’t Start

Error: bind: address already in useSolution:
# Find process using port
lsof -i :40401
# Kill the process
kill -9 <PID>

# Or change port in docker-compose.yml
ports:
  - "50401:40401"  # Changed from 40401
Error: Cannot connect to the Docker daemonSolution:
# Start Docker Desktop (macOS/Windows)
open -a Docker

# Start Docker service (Linux)
sudo systemctl start docker

# Verify Docker is running
docker version
Error: error returned from database: relation does not existSolution:
# Reset database and run migrations
docker-compose down -v
docker-compose up -d postgres
sleep 5

# Run migrations
sqlx migrate run --database-url postgresql://boop:password@localhost:5432/boop
Error: no space left on device or OOM errorsSolution:
# Clean up Docker
docker system prune -af
docker volume prune -f

# Increase Docker resources (Docker Desktop)
# Settings > Resources > Increase Memory/Disk

# Check disk space
df -h

Authentication Failures

Issue: Authentication fails with NO_MATCH errorDiagnostic Steps:
  1. Verify same seed used for registration and auth
  2. Check palm side (left vs right)
  3. Verify user is registered
# Test with consistent seed
SEED="test-user-1"

# Register
cargo run --bin mock-pvs -- --seed $SEED register
cargo run --bin mock-user -- --seed $SEED register "INTENT_LINK"

# Authenticate
cargo run --bin mock-pvs -- --seed $SEED auth --side right
Issue: Authentication times out after 30 secondsSolutions:
// Increase timeout in vendor code
const TIMEOUT = 60000;  // 60 seconds

// Implement retry logic
async function authWithRetry(maxRetries = 3) {
  for (let i = 0; i < maxRetries; i++) {
    try {
      return await authenticate();
    } catch (err) {
      if (err.code === 'TIMEOUT' && i < maxRetries - 1) {
        await wait(1000 * (i + 1));
        continue;
      }
      throw err;
    }
  }
}
Issue: Error: invalid socket address syntaxSolution:
# Use IP address format instead of hostname
# Wrong:
cargo run --bin mock-pvs -- serve localhost:8080

# Correct:
cargo run --bin mock-pvs -- serve 0.0.0.0:8080

WebSocket Issues

Issue: WebSocket disconnects randomlySolutions:
class RobustWebSocket {
  constructor(url) {
    this.url = url;
    this.reconnectDelay = 1000;
    this.maxReconnectDelay = 30000;
  }

  connect() {
    this.ws = new WebSocket(this.url);

    this.ws.onclose = () => {
      this.reconnect();
    };

    this.ws.onerror = (error) => {
      console.error('WebSocket error:', error);
      this.reconnect();
    };

    // Implement heartbeat
    this.ws.onopen = () => {
      this.reconnectDelay = 1000;
      this.startHeartbeat();
    };
  }

  startHeartbeat() {
    this.pingInterval = setInterval(() => {
      if (this.ws.readyState === WebSocket.OPEN) {
        this.ws.send(JSON.stringify({ type: 'ping' }));
      }
    }, 30000);
  }

  reconnect() {
    clearInterval(this.pingInterval);

    setTimeout(() => {
      console.log('Reconnecting...');
      this.connect();
    }, this.reconnectDelay);

    this.reconnectDelay = Math.min(
      this.reconnectDelay * 2,
      this.maxReconnectDelay
    );
  }
}
Issue: Vendor doesn’t receive auth resultsDiagnostic:
// Add logging to debug message flow
ws.onmessage = (event) => {
  console.log('Raw message:', event.data);

  try {
    const message = JSON.parse(event.data);
    console.log('Parsed message:', message);

    if (message.type === 'auth_result') {
      console.log('Auth result for context:', message.context_id);
    }
  } catch (err) {
    console.error('Failed to parse message:', err);
  }
};
Issue: WebSocket blocked by CORS policySolution:
# Add to nginx config
location /ws {
  proxy_pass http://backend;
  proxy_http_version 1.1;
  proxy_set_header Upgrade $http_upgrade;
  proxy_set_header Connection "upgrade";
  proxy_set_header Host $host;
  proxy_set_header X-Real-IP $remote_addr;
  proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
  proxy_set_header X-Forwarded-Proto $scheme;
}

Database Issues

Error: too many connections for roleSolution:
# Adjust connection pool settings
environment:
  DATABASE_URL: postgresql://user:pass@localhost/db
  DB_MAX_CONNECTIONS: 100
  DB_POOL_SIZE: 20
  DB_POOL_TIMEOUT: 30
// In code
let pool = PgPoolOptions::new()
    .max_connections(20)
    .acquire_timeout(Duration::from_secs(3))
    .connect(&database_url)
    .await?;
Issue: Database queries taking too longDiagnostic:
-- Find slow queries
SELECT
  query,
  calls,
  mean_exec_time,
  total_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;

-- Check missing indexes
SELECT
  schemaname,
  tablename,
  attname,
  n_distinct,
  correlation
FROM pg_stats
WHERE schemaname = 'public'
  AND n_distinct > 100
  AND correlation < 0.1
ORDER BY n_distinct DESC;
Solution:
-- Add missing indexes
CREATE INDEX idx_users_palm_hash ON users(palm_hash);
CREATE INDEX idx_auth_logs_user_timestamp ON auth_logs(user_id, timestamp);
CREATE INDEX idx_attributes_user_name ON attributes(user_id, name);

Performance Issues

Diagnostic:
# Find CPU-intensive processes
top -o %CPU

# Check Docker container stats
docker stats --no-stream

# Profile specific service
docker exec -it api-gateway sh -c "top -n 1"
Solutions:
  • Scale horizontally with more replicas
  • Optimize algorithms (check for O(n²) operations)
  • Implement caching
  • Use connection pooling
Diagnostic:
# Monitor memory usage over time
while true; do
  docker stats --no-stream --format "table {{.MemUsage}}\t{{.Name}}"
  sleep 60
done

# Check for goroutine leaks (Go services)
curl http://localhost:6060/debug/pprof/goroutine?debug=1

# Check for memory leaks (Rust services)
valgrind --leak-check=full target/release/api-gateway
Solutions:
  • Fix unclosed connections
  • Clear caches periodically
  • Limit concurrent operations
  • Use weak references where appropriate
Diagnostic:
# Measure API response times
ab -n 1000 -c 10 http://localhost:40401/health

# Trace slow requests
curl -w "@curl-timing.txt" -o /dev/null -s http://localhost:40401/api/v1/auth

# curl-timing.txt:
time_namelookup:  %{time_namelookup}\n
time_connect:  %{time_connect}\n
time_appconnect:  %{time_appconnect}\n
time_pretransfer:  %{time_pretransfer}\n
time_redirect:  %{time_redirect}\n
time_starttransfer:  %{time_starttransfer}\n
time_total:  %{time_total}\n
Solutions:
  • Add caching layer (Redis)
  • Optimize database queries
  • Implement request queuing
  • Use CDN for static assets

Mock Service Issues

Issue: http://localhost:8081 shows nothingSolutions:
# Verify service is running
ps aux | grep mock-pvs

# Check correct address format
cargo run --bin mock-pvs -- \
  --api-addr http://localhost:40401 \
  serve 0.0.0.0:8081  # Not localhost:8081

# Try different port
cargo run --bin mock-pvs -- \
  --api-addr http://localhost:40401 \
  serve 0.0.0.0:8082
Issue: Mock vendor running but not getting auth contextsSolution:
# Configure PVS with vendor endpoint
# 1. Get configuration intent
cargo run --bin mock-pvs -- \
  --api-addr http://localhost:40401 \
  configuration

# 2. Configure with operator
cargo run --bin mock-operator -- \
  --pvs-backend-endpoint localhost:8080 \
  --api-addr http://localhost:40401 \
  pvs-configuration "INTENT_LINK"

# Verify configuration
curl http://localhost:40401/api/v1/pvs/config

Debugging Tools

Logging

# Enable debug logging
environment:
  RUST_LOG: debug
  RUST_BACKTRACE: full
  LOG_FORMAT: json

# Pretty print logs
cargo run 2>&1 | jq .

# Filter logs
docker-compose logs -f | grep ERROR

# Save logs for analysis
docker-compose logs > boop-logs.txt

Network Analysis

# Monitor WebSocket traffic
tcpdump -i any -w websocket.pcap port 40403

# Analyze with Wireshark
wireshark websocket.pcap

# Monitor HTTP traffic
mitmproxy -p 8888

# Test with curl through proxy
curl -x http://localhost:8888 http://localhost:40401/health

Performance Profiling

# CPU profiling (Rust)
cargo build --release
perf record -g target/release/api-gateway
perf report

# Memory profiling
valgrind --tool=massif target/release/api-gateway
ms_print massif.out.*

# Flame graph
cargo install flamegraph
cargo flamegraph --bin api-gateway

Health Checks

Service Health Endpoints

# Check all services
for port in 40401 40402 40403 40404 40405; do
  echo "Checking port $port:"
  curl -s http://localhost:$port/health | jq .
done

# Monitor continuously
watch -n 5 'curl -s http://localhost:40401/health | jq .'

Database Health

-- Check connection count
SELECT count(*) FROM pg_stat_activity;

-- Check database size
SELECT pg_database_size('boop');

-- Check table sizes
SELECT
  schemaname,
  tablename,
  pg_size_pretty(pg_total_relation_size(tablename::regclass)) as size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;

Recovery Procedures

Service Recovery

# Restart single service
docker-compose restart api-gateway

# Full restart
docker-compose down
docker-compose up -d

# Reset everything
docker-compose down -v
docker system prune -af
docker-compose build --no-cache
docker-compose up -d

Database Recovery

# Backup database
docker exec postgres pg_dump -U boop boop > backup.sql

# Restore database
docker exec -i postgres psql -U boop boop < backup.sql

# Reset and migrate
docker-compose down -v
docker-compose up -d postgres
sleep 5
sqlx migrate run

Getting Help