#!/bin/bash # Test script for PDF Text Extraction Daemon BASE_URL="http://localhost:8000" echo "==========================================" echo "PDF Text Extraction Daemon - Test Suite" echo "==========================================" echo "" # Test 1: Health check echo "[TEST 1] Health Check" curl -s "$BASE_URL/health" | python3 -m json.tool 2>/dev/null || curl -s "$BASE_URL/health" echo "" # Test 2: API Info echo "[TEST 2] API Info" curl -s "$BASE_URL/" | python3 -m json.tool 2>/dev/null || curl -s "$BASE_URL/" echo "" # Test 3: Extract from URL (basic) echo "[TEST 3] Extract PDF from URL (5 pages, ~423KB)" RESULT=$(curl -s "$BASE_URL/extract?url=https://www.pdf995.com/samples/pdf.pdf") echo "$RESULT" | python3 -c " import sys, json data = json.load(sys.stdin) print(f'✓ Success: {data[\"success\"]}') print(f'✓ Pages: {data[\"pages\"]}') print(f'✓ Size: {data[\"file_size_kb\"]:.2f} KB') print(f'✓ Time: {data[\"extraction_time_ms\"]:.2f}ms') print(f'✓ Message: {data[\"message\"]}') " 2>/dev/null || echo "$RESULT" | grep -E "(success|pages|Size|Time)" echo "" # Test 4: Extract with custom output file echo "[TEST 4] Extract PDF with custom output file" curl -s "$BASE_URL/extract?url=https://www.pdf995.com/samples/pdf.pdf&output_file=daemon_test.txt" | python3 -m json.tool 2>/dev/null || echo "" if [ -f /tmp/daemon_test.txt ]; then echo "✓ Output file created: $(ls -lh /tmp/daemon_test.txt | awk '{print $5, $6}') KB" else echo "✗ Output file not found" fi echo "" # Test 5: Invalid URL (should fail) echo "[TEST 5] Invalid URL handling" curl -s "$BASE_URL/extract?url=not-a-url" | python3 -m json.tool 2>/dev/null || echo "" echo "" # Test 6: Non-existent URL (should fail) echo "[TEST 6] Non-existent PDF URL handling" curl -s "$BASE_URL/extract?url=https://www.example.com/nonexistent.pdf" | python3 -m json.tool 2>/dev/null || echo "" echo "" echo "==========================================" echo "Test Suite Complete!" echo "=========================================="