pdf_tool/comprehensive_test.sh

#!/bin/bash
# Comprehensive Test Suite for PDF Text Extraction Daemon
# Tests various PDF types from sample-files.com

BASE_URL="http://localhost:8000"

echo "=============================================="
echo "COMPREHENSIVE PDF EXTRACTOR TEST SUITE"
echo "=============================================="
echo ""

# Define test cases
declare -a TESTS=(
    "basic-text|https://sample-files.com/downloads/documents/pdf/basic-text.pdf|72.9 KB|1 page|Simple text document"
    "image-doc|https://sample-files.com/downloads/documents/pdf/image-doc.pdf|7.97 MB|6 pages|Image-heavy PDF"
    "fillable-form|https://sample-files.com/downloads/documents/pdf/fillable-form.pdf|52.7 KB|2 pages|Interactive form"
    "dev-example|https://sample-files.com/downloads/documents/pdf/dev-example.pdf|690 KB|6 pages|Developer example"
)

PASS=0
FAIL=0

for TEST in "${TESTS[@]}"; do
    IFS='|' read -r NAME URL SIZE PAGES DESC <<< "$TEST"

    echo "----------------------------------------------"
    echo "Test: $NAME"
    echo "URL: $URL"
    echo "Expected: $SIZE, $PAGES ($DESC)"
    echo "----------------------------------------------"

    START_TIME=$(date +%s%N)

    # Make API call
    RESULT=$(curl -s "$BASE_URL/extract?url=$URL")

    END_TIME=$(date +%s%N)
    ELAPSED_MS=$(( (END_TIME - START_TIME) / 1000000 ))

    # Parse response
    SUCCESS=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success', False))" 2>/dev/null)
    EXTRACTED_PAGES=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('pages', 0))" 2>/dev/null)
    FILE_SIZE=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('file_size_kb', 0))" 2>/dev/null)
    EXTRACTION_TIME=$(echo "$RESULT" | python3 -c "import sys,json; print(round(json.load(sys.stdin).get('extraction_time_ms', 0), 2))" 2>/dev/null)
    MESSAGE=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('message', 'N/A'))" 2>/dev/null)

    echo ""
    echo "Results:"
    echo "  Status: $SUCCESS"
    echo "  Pages extracted: $EXTRACTED_PAGES"
    echo "  File size: ${FILE_SIZE} KB"
    echo "  Extraction time: ${EXTRACTION_TIME}ms"
    echo "  Total round-trip: ${ELAPSED_MS}ms"
    echo "  Message: $MESSAGE"

    # Validate results
    if [ "$SUCCESS" = "True" ] && [ -n "$EXTRACTED_PAGES" ]; then
        echo ""
        echo "✓ PASS"
        ((PASS++))
    else
        echo ""
        echo "✗ FAIL: $RESULT"
        ((FAIL++))
    fi

    echo ""
done

# Test error handling
echo "=============================================="
echo "ERROR HANDLING TESTS"
echo "=============================================="
echo ""

# Invalid URL format
echo "Test: Invalid URL format (no http://)"
RESULT=$(curl -s "$BASE_URL/extract?url=not-a-url.pdf")
if echo "$RESULT" | grep -q "must start with"; then
    echo "✓ PASS (Correctly rejected invalid URL)"
else
    echo "✗ FAIL (Should reject without http://)"
    ((FAIL++))
fi
echo ""

# Non-existent URL
echo "Test: Non-existent PDF URL"
RESULT=$(curl -s "$BASE_URL/extract?url=https://example.com/nonexistent.pdf")
if echo "$RESULT" | grep -q "404"; then
    echo "✓ PASS (Correctly returned 404)"
else
    echo "✗ FAIL (Should return 404)"
    ((FAIL++))
fi
echo ""

# Test with output file parameter
echo "=============================================="
echo "OUTPUT FILE TEST"
echo "=============================================="
echo ""

echo "Test: Extract with custom output file"
RESULT=$(curl -s "$BASE_URL/extract?url=https://sample-files.com/downloads/documents/pdf/basic-text.pdf&output_file=test_output.txt")

if [ -f /tmp/test_output.txt ]; then
    echo "✓ PASS (Output file created)"
    echo "  File size: $(ls -lh /tmp/test_output.txt | awk '{print $5}')"
    ((PASS++))
else
    echo "✗ FAIL (Output file not found)"
    ((FAIL++))
fi
echo ""

# Summary
echo "=============================================="
echo "TEST SUMMARY"
echo "=============================================="
echo "Passed: $PASS"
echo "Failed: $FAIL"
TOTAL=$((PASS + FAIL))
echo "Total:  $TOTAL"
echo ""

if [ $FAIL -eq 0 ]; then
    echo "✓ ALL TESTS PASSED!"
else
    echo "✗ Some tests failed. Review output above."
fi