Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 45 additions & 15 deletions backend/routers/field_officer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Issue #288: Field Officer Check-In System With Location Verification
"""

from starlette.concurrency import run_in_threadpool
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Response
from sqlalchemy.orm import Session
from sqlalchemy import func, case
Expand Down Expand Up @@ -34,6 +35,8 @@
)
from backend.cache import visit_last_hash_cache, visit_stats_cache
from backend.schemas import BlockchainVerificationResponse
from backend.utils import process_uploaded_image


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -302,6 +305,17 @@ async def upload_visit_images(

image_paths = []

def save_images(images_data):
saved_paths = []
for safe_filename, img_bytes in images_data:
file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename)
with open(file_path, 'wb') as f:
f.write(img_bytes)
relative_path = os.path.join("data", "visit_images", safe_filename)
saved_paths.append(relative_path)
return saved_paths
Comment thread
RohanExploit marked this conversation as resolved.

Comment on lines +308 to +317
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Prevent orphaned files when disk write or DB commit fails.

Line 376 writes files before db.commit() on Line 382. If write partially fails or commit fails, uploaded files can be left on disk without DB references. Add rollback + file cleanup on failure paths.

💡 Suggested fix
+        written_abs_paths = []
         def save_images(images_data):
             saved_paths = []
             for safe_filename, img_bytes in images_data:
                 file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename)
                 with open(file_path, 'wb') as f:
                     f.write(img_bytes)
+                written_abs_paths.append(file_path)
                 relative_path = os.path.join("data", "visit_images", safe_filename)
                 saved_paths.append(relative_path)
             return saved_paths
@@
-        image_paths = await run_in_threadpool(save_images, images_to_save)
+        try:
+            image_paths = await run_in_threadpool(save_images, images_to_save)
+            existing_images.extend(image_paths)
+            visit.visit_images = existing_images
+            visit.updated_at = datetime.now(timezone.utc)
+            db.commit()
+        except Exception:
+            db.rollback()
+            for p in written_abs_paths:
+                try:
+                    os.remove(p)
+                except FileNotFoundError:
+                    pass
+            raise
 
-        # Keep SQLAlchemy operations in main async context
-        existing_images.extend(image_paths)
-        visit.visit_images = existing_images
-        visit.updated_at = datetime.now(timezone.utc)
-        db.commit()

Also applies to: 375-383, 392-396

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/routers/field_officer.py` around lines 308 - 317, The current
save_images function writes files to VISIT_IMAGES_DIR before db.commit(),
risking orphaned files if any file write or the subsequent DB commit fails;
update the flow to use a transactional and cleanup strategy: within a try/except
around the file writes + DB operations, collect saved_paths (or write to a
temporary directory/temporary filenames), perform the DB operations and call
db.commit(), and if any exception occurs call db.rollback() and delete any files
that were written (using the saved_paths or temp-to-final rollback move); ensure
the logic in save_images (and the surrounding handler that calls db.commit())
either moves temp files to their final paths only after a successful commit or
removes written files on failure so no orphaned files remain.

images_to_save = []
for idx, image in enumerate(images):
# Validate content_type is present
if not image.content_type:
Expand All @@ -310,45 +324,61 @@ async def upload_visit_images(
# Validate file type
if not image.content_type.startswith('image/'):
raise HTTPException(status_code=400, detail=f"File must be an image, got {image.content_type}")

# Validate filename is present
if not image.filename:
raise HTTPException(status_code=400, detail="File must have a filename")

# Validate extension
extension = image.filename.split('.')[-1].lower() if '.' in image.filename else ''
# Validate extension using existing allowlist to prevent unrestricted file upload
if '.' not in image.filename:
raise HTTPException(status_code=400, detail="File must have an extension")

extension = image.filename.split('.')[-1].lower()
if extension not in ALLOWED_IMAGE_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"File extension '{extension}' not allowed. Allowed: {', '.join(ALLOWED_IMAGE_EXTENSIONS)}"
)

# Read and validate file size
# Read and validate file size directly here since utils max size is 20MB, but route requires 10MB
content = await image.read()
if len(content) > MAX_UPLOAD_SIZE:
raise HTTPException(
status_code=400,
detail=f"File {image.filename} exceeds maximum size of {MAX_UPLOAD_SIZE / 1024 / 1024:.1f} MB"
)

# Reset file pointer for the utility processing
await image.seek(0)
Comment thread
RohanExploit marked this conversation as resolved.

# Offload processing to threadpool using central utility
try:
pil_img, image_bytes = await process_uploaded_image(image)
# Ensure the saved extension matches the actual image format if possible, otherwise fallback to safe validated extension
actual_ext = pil_img.format.lower() if pil_img and pil_img.format else extension
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: actual_ext is derived from pil_img.format, but that value is always unset from process_uploaded_image, so extension checks silently fall back to the user-provided suffix and can save format/extension mismatches.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/field_officer.py, line 358:

<comment>`actual_ext` is derived from `pil_img.format`, but that value is always unset from `process_uploaded_image`, so extension checks silently fall back to the user-provided suffix and can save format/extension mismatches.</comment>

<file context>
@@ -310,45 +324,61 @@ async def upload_visit_images(
+            try:
+                pil_img, image_bytes = await process_uploaded_image(image)
+                # Ensure the saved extension matches the actual image format if possible, otherwise fallback to safe validated extension
+                actual_ext = pil_img.format.lower() if pil_img and pil_img.format else extension
+                # Map some formats to standard extensions
+                if actual_ext == 'jpeg': actual_ext = 'jpg'
</file context>

# Map some formats to standard extensions
if actual_ext == 'jpeg': actual_ext = 'jpg'
if actual_ext not in ALLOWED_IMAGE_EXTENSIONS:
actual_ext = extension
Comment on lines +356 to +362
except HTTPException:
raise
except Exception as e:
logger.error(f"Image processing failed: {e}")
raise HTTPException(status_code=400, detail="Invalid image file.")

# Generate secure filename
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{extension}"
file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename)

# Save file
with open(file_path, 'wb') as f:
f.write(content)
safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Filename generation uses second-level timestamp plus loop index, which is not unique under concurrency. Two simultaneous uploads for the same visit_id within the same second will produce identical filenames (e.g., both start at idx=0) and silently overwrite each other's files. Add a random or UUID suffix to guarantee uniqueness.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/field_officer.py, line 371:

<comment>Filename generation uses second-level timestamp plus loop index, which is not unique under concurrency. Two simultaneous uploads for the same `visit_id` within the same second will produce identical filenames (e.g., both start at `idx=0`) and silently overwrite each other's files. Add a random or UUID suffix to guarantee uniqueness.</comment>

<file context>
@@ -310,45 +324,61 @@ async def upload_visit_images(
-            # Save file
-            with open(file_path, 'wb') as f:
-                f.write(content)
+            safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}"
             
-            # Store relative path
</file context>


Comment on lines 370 to 372
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Filename generation can overwrite files under concurrency.

Line 370 uses second-level timestamp plus idx; concurrent requests for the same visit_id in the same second can produce identical filenames and overwrite existing images. Add a random suffix (e.g., uuid4) to make names collision-resistant.

💡 Suggested fix
+from uuid import uuid4
@@
-            safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}"
+            safe_filename = f"visit_{visit_id}_{timestamp}_{idx}_{uuid4().hex[:8]}.{actual_ext}"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{extension}"
file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename)
# Save file
with open(file_path, 'wb') as f:
f.write(content)
safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}"
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
safe_filename = f"visit_{visit_id}_{timestamp}_{idx}_{uuid4().hex[:8]}.{actual_ext}"
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/routers/field_officer.py` around lines 370 - 372, The filename
generation in the visit image handling (the block creating timestamp and
safe_filename) can collide under concurrent requests; modify the logic in the
same function that currently sets timestamp =
datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') and safe_filename =
f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}" to append a random,
collision-resistant suffix (e.g., uuid4 hex) to the filename; add an import for
uuid and incorporate uuid.uuid4().hex (or a truncated form) into safe_filename
so names are unique even when timestamp and idx collide.

# Store relative path
relative_path = os.path.join("data", "visit_images", safe_filename)
image_paths.append(relative_path)
images_to_save.append((safe_filename, image_bytes))
Comment on lines 369 to +373

# Offload only file writing to threadpool
image_paths = await run_in_threadpool(save_images, images_to_save)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: No cleanup of written files on failure. If save_images partially succeeds (e.g., disk-full mid-loop) or db.commit() raises, already-written files remain on disk with no corresponding DB record. Track written paths and delete them in an exception handler before re-raising.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/field_officer.py, line 376:

<comment>No cleanup of written files on failure. If `save_images` partially succeeds (e.g., disk-full mid-loop) or `db.commit()` raises, already-written files remain on disk with no corresponding DB record. Track written paths and delete them in an exception handler before re-raising.</comment>

<file context>
@@ -310,45 +324,61 @@ async def upload_visit_images(
+            images_to_save.append((safe_filename, image_bytes))
+
+        # Offload only file writing to threadpool
+        image_paths = await run_in_threadpool(save_images, images_to_save)
         
-        # Update visit with image paths
</file context>


# Update visit with image paths
# Keep SQLAlchemy operations in main async context
existing_images.extend(image_paths)
visit.visit_images = existing_images
visit.updated_at = datetime.now(timezone.utc)

db.commit()

logger.info(f"Uploaded {len(images)} images for visit {visit_id}")
Expand Down
Loading