#!/bin/bash
# Test script for PDF Text Extraction Daemon

BASE_URL="http://localhost:8000"

echo "=========================================="
echo "PDF Text Extraction Daemon - Test Suite"
echo "=========================================="
echo ""

# Test 1: Health check
echo "[TEST 1] Health Check"
curl -s "$BASE_URL/health" | python3 -m json.tool 2>/dev/null || curl -s "$BASE_URL/health"
echo ""

# Test 2: API Info
echo "[TEST 2] API Info"
curl -s "$BASE_URL/" | python3 -m json.tool 2>/dev/null || curl -s "$BASE_URL/"
echo ""

# Test 3: Extract from URL (basic)
echo "[TEST 3] Extract PDF from URL (5 pages, ~423KB)"
RESULT=$(curl -s "$BASE_URL/extract?url=https://www.pdf995.com/samples/pdf.pdf")

echo "$RESULT" | python3 -c "
import sys, json
data = json.load(sys.stdin)
print(f'✓ Success: {data[\"success\"]}')
print(f'✓ Pages: {data[\"pages\"]}')
print(f'✓ Size: {data[\"file_size_kb\"]:.2f} KB')
print(f'✓ Time: {data[\"extraction_time_ms\"]:.2f}ms')
print(f'✓ Message: {data[\"message\"]}')
" 2>/dev/null || echo "$RESULT" | grep -E "(success|pages|Size|Time)"
echo ""

# Test 4: Extract with custom output file
echo "[TEST 4] Extract PDF with custom output file"
curl -s "$BASE_URL/extract?url=https://www.pdf995.com/samples/pdf.pdf&output_file=daemon_test.txt" | python3 -m json.tool 2>/dev/null || echo ""

if [ -f /tmp/daemon_test.txt ]; then
    echo "✓ Output file created: $(ls -lh /tmp/daemon_test.txt | awk '{print $5, $6}') KB"
else
    echo "✗ Output file not found"
fi
echo ""

# Test 5: Invalid URL (should fail)
echo "[TEST 5] Invalid URL handling"
curl -s "$BASE_URL/extract?url=not-a-url" | python3 -m json.tool 2>/dev/null || echo ""
echo ""

# Test 6: Non-existent URL (should fail)
echo "[TEST 6] Non-existent PDF URL handling"
curl -s "$BASE_URL/extract?url=https://www.example.com/nonexistent.pdf" | python3 -m json.tool 2>/dev/null || echo ""
echo ""

echo "=========================================="
echo "Test Suite Complete!"
echo "=========================================="