-
Notifications
You must be signed in to change notification settings - Fork 35
Optimize upload_visit_images by offloading I/O while preserving thread safety and security
#714
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |||||||||||||||||||||||
| Issue #288: Field Officer Check-In System With Location Verification | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| from starlette.concurrency import run_in_threadpool | ||||||||||||||||||||||||
| from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Response | ||||||||||||||||||||||||
| from sqlalchemy.orm import Session | ||||||||||||||||||||||||
| from sqlalchemy import func, case | ||||||||||||||||||||||||
|
|
@@ -34,6 +35,8 @@ | |||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
| from backend.cache import visit_last_hash_cache, visit_stats_cache | ||||||||||||||||||||||||
| from backend.schemas import BlockchainVerificationResponse | ||||||||||||||||||||||||
| from backend.utils import process_uploaded_image | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
@@ -302,6 +305,17 @@ async def upload_visit_images( | |||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| image_paths = [] | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def save_images(images_data): | ||||||||||||||||||||||||
| saved_paths = [] | ||||||||||||||||||||||||
| for safe_filename, img_bytes in images_data: | ||||||||||||||||||||||||
| file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename) | ||||||||||||||||||||||||
| with open(file_path, 'wb') as f: | ||||||||||||||||||||||||
| f.write(img_bytes) | ||||||||||||||||||||||||
| relative_path = os.path.join("data", "visit_images", safe_filename) | ||||||||||||||||||||||||
| saved_paths.append(relative_path) | ||||||||||||||||||||||||
| return saved_paths | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
Comment on lines
+308
to
+317
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prevent orphaned files when disk write or DB commit fails. Line 376 writes files before 💡 Suggested fix+ written_abs_paths = []
def save_images(images_data):
saved_paths = []
for safe_filename, img_bytes in images_data:
file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename)
with open(file_path, 'wb') as f:
f.write(img_bytes)
+ written_abs_paths.append(file_path)
relative_path = os.path.join("data", "visit_images", safe_filename)
saved_paths.append(relative_path)
return saved_paths
@@
- image_paths = await run_in_threadpool(save_images, images_to_save)
+ try:
+ image_paths = await run_in_threadpool(save_images, images_to_save)
+ existing_images.extend(image_paths)
+ visit.visit_images = existing_images
+ visit.updated_at = datetime.now(timezone.utc)
+ db.commit()
+ except Exception:
+ db.rollback()
+ for p in written_abs_paths:
+ try:
+ os.remove(p)
+ except FileNotFoundError:
+ pass
+ raise
- # Keep SQLAlchemy operations in main async context
- existing_images.extend(image_paths)
- visit.visit_images = existing_images
- visit.updated_at = datetime.now(timezone.utc)
- db.commit()Also applies to: 375-383, 392-396 🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| images_to_save = [] | ||||||||||||||||||||||||
| for idx, image in enumerate(images): | ||||||||||||||||||||||||
| # Validate content_type is present | ||||||||||||||||||||||||
| if not image.content_type: | ||||||||||||||||||||||||
|
|
@@ -310,45 +324,61 @@ async def upload_visit_images( | |||||||||||||||||||||||
| # Validate file type | ||||||||||||||||||||||||
| if not image.content_type.startswith('image/'): | ||||||||||||||||||||||||
| raise HTTPException(status_code=400, detail=f"File must be an image, got {image.content_type}") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Validate filename is present | ||||||||||||||||||||||||
| if not image.filename: | ||||||||||||||||||||||||
| raise HTTPException(status_code=400, detail="File must have a filename") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Validate extension | ||||||||||||||||||||||||
| extension = image.filename.split('.')[-1].lower() if '.' in image.filename else '' | ||||||||||||||||||||||||
| # Validate extension using existing allowlist to prevent unrestricted file upload | ||||||||||||||||||||||||
| if '.' not in image.filename: | ||||||||||||||||||||||||
| raise HTTPException(status_code=400, detail="File must have an extension") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| extension = image.filename.split('.')[-1].lower() | ||||||||||||||||||||||||
| if extension not in ALLOWED_IMAGE_EXTENSIONS: | ||||||||||||||||||||||||
| raise HTTPException( | ||||||||||||||||||||||||
| status_code=400, | ||||||||||||||||||||||||
| detail=f"File extension '{extension}' not allowed. Allowed: {', '.join(ALLOWED_IMAGE_EXTENSIONS)}" | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Read and validate file size | ||||||||||||||||||||||||
| # Read and validate file size directly here since utils max size is 20MB, but route requires 10MB | ||||||||||||||||||||||||
| content = await image.read() | ||||||||||||||||||||||||
| if len(content) > MAX_UPLOAD_SIZE: | ||||||||||||||||||||||||
| raise HTTPException( | ||||||||||||||||||||||||
| status_code=400, | ||||||||||||||||||||||||
| detail=f"File {image.filename} exceeds maximum size of {MAX_UPLOAD_SIZE / 1024 / 1024:.1f} MB" | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Reset file pointer for the utility processing | ||||||||||||||||||||||||
| await image.seek(0) | ||||||||||||||||||||||||
|
RohanExploit marked this conversation as resolved.
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Offload processing to threadpool using central utility | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| pil_img, image_bytes = await process_uploaded_image(image) | ||||||||||||||||||||||||
| # Ensure the saved extension matches the actual image format if possible, otherwise fallback to safe validated extension | ||||||||||||||||||||||||
| actual_ext = pil_img.format.lower() if pil_img and pil_img.format else extension | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Prompt for AI agents |
||||||||||||||||||||||||
| # Map some formats to standard extensions | ||||||||||||||||||||||||
| if actual_ext == 'jpeg': actual_ext = 'jpg' | ||||||||||||||||||||||||
| if actual_ext not in ALLOWED_IMAGE_EXTENSIONS: | ||||||||||||||||||||||||
| actual_ext = extension | ||||||||||||||||||||||||
|
Comment on lines
+356
to
+362
|
||||||||||||||||||||||||
| except HTTPException: | ||||||||||||||||||||||||
| raise | ||||||||||||||||||||||||
| except Exception as e: | ||||||||||||||||||||||||
| logger.error(f"Image processing failed: {e}") | ||||||||||||||||||||||||
| raise HTTPException(status_code=400, detail="Invalid image file.") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Generate secure filename | ||||||||||||||||||||||||
| timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') | ||||||||||||||||||||||||
| safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{extension}" | ||||||||||||||||||||||||
| file_path = os.path.join(VISIT_IMAGES_DIR, safe_filename) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Save file | ||||||||||||||||||||||||
| with open(file_path, 'wb') as f: | ||||||||||||||||||||||||
| f.write(content) | ||||||||||||||||||||||||
| safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}" | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Filename generation uses second-level timestamp plus loop index, which is not unique under concurrency. Two simultaneous uploads for the same Prompt for AI agents |
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
Comment on lines
370
to
372
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filename generation can overwrite files under concurrency. Line 370 uses second-level timestamp plus 💡 Suggested fix+from uuid import uuid4
@@
- safe_filename = f"visit_{visit_id}_{timestamp}_{idx}.{actual_ext}"
+ safe_filename = f"visit_{visit_id}_{timestamp}_{idx}_{uuid4().hex[:8]}.{actual_ext}"📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||
| # Store relative path | ||||||||||||||||||||||||
| relative_path = os.path.join("data", "visit_images", safe_filename) | ||||||||||||||||||||||||
| image_paths.append(relative_path) | ||||||||||||||||||||||||
| images_to_save.append((safe_filename, image_bytes)) | ||||||||||||||||||||||||
|
Comment on lines
369
to
+373
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Offload only file writing to threadpool | ||||||||||||||||||||||||
| image_paths = await run_in_threadpool(save_images, images_to_save) | ||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: No cleanup of written files on failure. If Prompt for AI agents |
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Update visit with image paths | ||||||||||||||||||||||||
| # Keep SQLAlchemy operations in main async context | ||||||||||||||||||||||||
| existing_images.extend(image_paths) | ||||||||||||||||||||||||
| visit.visit_images = existing_images | ||||||||||||||||||||||||
| visit.updated_at = datetime.now(timezone.utc) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| db.commit() | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| logger.info(f"Uploaded {len(images)} images for visit {visit_id}") | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.