132 lines
4.2 KiB
Bash
Executable File
132 lines
4.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# Comprehensive Test Suite for PDF Text Extraction Daemon
|
|
# Tests various PDF types from sample-files.com
|
|
|
|
BASE_URL="http://localhost:8000"
|
|
|
|
echo "=============================================="
|
|
echo "COMPREHENSIVE PDF EXTRACTOR TEST SUITE"
|
|
echo "=============================================="
|
|
echo ""
|
|
|
|
# Define test cases
|
|
declare -a TESTS=(
|
|
"basic-text|https://sample-files.com/downloads/documents/pdf/basic-text.pdf|72.9 KB|1 page|Simple text document"
|
|
"image-doc|https://sample-files.com/downloads/documents/pdf/image-doc.pdf|7.97 MB|6 pages|Image-heavy PDF"
|
|
"fillable-form|https://sample-files.com/downloads/documents/pdf/fillable-form.pdf|52.7 KB|2 pages|Interactive form"
|
|
"dev-example|https://sample-files.com/downloads/documents/pdf/dev-example.pdf|690 KB|6 pages|Developer example"
|
|
)
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
|
|
for TEST in "${TESTS[@]}"; do
|
|
IFS='|' read -r NAME URL SIZE PAGES DESC <<< "$TEST"
|
|
|
|
echo "----------------------------------------------"
|
|
echo "Test: $NAME"
|
|
echo "URL: $URL"
|
|
echo "Expected: $SIZE, $PAGES ($DESC)"
|
|
echo "----------------------------------------------"
|
|
|
|
START_TIME=$(date +%s%N)
|
|
|
|
# Make API call
|
|
RESULT=$(curl -s "$BASE_URL/extract?url=$URL")
|
|
|
|
END_TIME=$(date +%s%N)
|
|
ELAPSED_MS=$(( (END_TIME - START_TIME) / 1000000 ))
|
|
|
|
# Parse response
|
|
SUCCESS=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('success', False))" 2>/dev/null)
|
|
EXTRACTED_PAGES=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('pages', 0))" 2>/dev/null)
|
|
FILE_SIZE=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('file_size_kb', 0))" 2>/dev/null)
|
|
EXTRACTION_TIME=$(echo "$RESULT" | python3 -c "import sys,json; print(round(json.load(sys.stdin).get('extraction_time_ms', 0), 2))" 2>/dev/null)
|
|
MESSAGE=$(echo "$RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('message', 'N/A'))" 2>/dev/null)
|
|
|
|
echo ""
|
|
echo "Results:"
|
|
echo " Status: $SUCCESS"
|
|
echo " Pages extracted: $EXTRACTED_PAGES"
|
|
echo " File size: ${FILE_SIZE} KB"
|
|
echo " Extraction time: ${EXTRACTION_TIME}ms"
|
|
echo " Total round-trip: ${ELAPSED_MS}ms"
|
|
echo " Message: $MESSAGE"
|
|
|
|
# Validate results
|
|
if [ "$SUCCESS" = "True" ] && [ -n "$EXTRACTED_PAGES" ]; then
|
|
echo ""
|
|
echo "✓ PASS"
|
|
((PASS++))
|
|
else
|
|
echo ""
|
|
echo "✗ FAIL: $RESULT"
|
|
((FAIL++))
|
|
fi
|
|
|
|
echo ""
|
|
done
|
|
|
|
# Test error handling
|
|
echo "=============================================="
|
|
echo "ERROR HANDLING TESTS"
|
|
echo "=============================================="
|
|
echo ""
|
|
|
|
# Invalid URL format
|
|
echo "Test: Invalid URL format (no http://)"
|
|
RESULT=$(curl -s "$BASE_URL/extract?url=not-a-url.pdf")
|
|
if echo "$RESULT" | grep -q "must start with"; then
|
|
echo "✓ PASS (Correctly rejected invalid URL)"
|
|
else
|
|
echo "✗ FAIL (Should reject without http://)"
|
|
((FAIL++))
|
|
fi
|
|
echo ""
|
|
|
|
# Non-existent URL
|
|
echo "Test: Non-existent PDF URL"
|
|
RESULT=$(curl -s "$BASE_URL/extract?url=https://example.com/nonexistent.pdf")
|
|
if echo "$RESULT" | grep -q "404"; then
|
|
echo "✓ PASS (Correctly returned 404)"
|
|
else
|
|
echo "✗ FAIL (Should return 404)"
|
|
((FAIL++))
|
|
fi
|
|
echo ""
|
|
|
|
# Test with output file parameter
|
|
echo "=============================================="
|
|
echo "OUTPUT FILE TEST"
|
|
echo "=============================================="
|
|
echo ""
|
|
|
|
echo "Test: Extract with custom output file"
|
|
RESULT=$(curl -s "$BASE_URL/extract?url=https://sample-files.com/downloads/documents/pdf/basic-text.pdf&output_file=test_output.txt")
|
|
|
|
if [ -f /tmp/test_output.txt ]; then
|
|
echo "✓ PASS (Output file created)"
|
|
echo " File size: $(ls -lh /tmp/test_output.txt | awk '{print $5}')"
|
|
((PASS++))
|
|
else
|
|
echo "✗ FAIL (Output file not found)"
|
|
((FAIL++))
|
|
fi
|
|
echo ""
|
|
|
|
# Summary
|
|
echo "=============================================="
|
|
echo "TEST SUMMARY"
|
|
echo "=============================================="
|
|
echo "Passed: $PASS"
|
|
echo "Failed: $FAIL"
|
|
TOTAL=$((PASS + FAIL))
|
|
echo "Total: $TOTAL"
|
|
echo ""
|
|
|
|
if [ $FAIL -eq 0 ]; then
|
|
echo "✓ ALL TESTS PASSED!"
|
|
else
|
|
echo "✗ Some tests failed. Review output above."
|
|
fi
|