From dcbdcfec1634ff8af64464cbe39a814ca3fd3911 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 18 Mar 2025 16:08:02 -0400
Subject: [PATCH 01/14] Preliminary work for ComfyUI native API integration.

---
 server/app_api.py             | 438 ++++++++++++++++++
 server/pipeline_api.py        | 195 ++++++++
 src/comfystream/client_api.py | 836 ++++++++++++++++++++++++++++++++++
 src/comfystream/utils_api.py  | 245 ++++++++++
 4 files changed, 1714 insertions(+)
 create mode 100644 server/app_api.py
 create mode 100644 server/pipeline_api.py
 create mode 100644 src/comfystream/client_api.py
 create mode 100644 src/comfystream/utils_api.py

diff --git a/server/app_api.py b/server/app_api.py
new file mode 100644
index 00000000..6158a540
--- /dev/null
+++ b/server/app_api.py
@@ -0,0 +1,438 @@
+import argparse
+import asyncio
+import json
+import logging
+import os
+import sys
+
+import torch
+
+# Initialize CUDA before any other imports to prevent core dump.
+if torch.cuda.is_available():
+    torch.cuda.init()
+
+
+from aiohttp import web
+from aiortc import (
+    MediaStreamTrack,
+    RTCConfiguration,
+    RTCIceServer,
+    RTCPeerConnection,
+    RTCSessionDescription,
+)
+from aiortc.codecs import h264
+from aiortc.rtcrtpsender import RTCRtpSender
+from pipeline_api import Pipeline # TODO: Better integration (Are we replacing pipeline with pipeline_api?)
+from twilio.rest import Client
+from utils import patch_loop_datagram, add_prefix_to_app_routes, FPSMeter
+from metrics import MetricsManager, StreamStatsManager
+import time
+
+logger = logging.getLogger(__name__)
+logging.getLogger("aiortc.rtcrtpsender").setLevel(logging.WARNING)
+logging.getLogger("aiortc.rtcrtpreceiver").setLevel(logging.WARNING)
+
+
+MAX_BITRATE = 2000000
+MIN_BITRATE = 2000000
+
+
+class VideoStreamTrack(MediaStreamTrack):
+    """video stream track that processes video frames using a pipeline.
+
+    Attributes:
+        kind (str): The kind of media, which is "video" for this class.
+        track (MediaStreamTrack): The underlying media stream track.
+        pipeline (Pipeline): The processing pipeline to apply to each video frame.
+    """
+
+    kind = "video"
+
+    def __init__(self, track: MediaStreamTrack, pipeline: Pipeline):
+        """Initialize the VideoStreamTrack.
+
+        Args:
+            track: The underlying media stream track.
+            pipeline: The processing pipeline to apply to each video frame.
+        """
+        super().__init__()
+        self.track = track
+        self.pipeline = pipeline
+        self.fps_meter = FPSMeter(
+            metrics_manager=app["metrics_manager"], track_id=track.id
+        )
+        self.running = True
+        self.collect_task = asyncio.create_task(self.collect_frames())
+        
+        # Add cleanup when track ends
+        @track.on("ended")
+        async def on_ended():
+            logger.info("Source video track ended, stopping collection")
+            await cancel_collect_frames(self)
+
+    async def collect_frames(self):
+        """Collect video frames from the underlying track and pass them to
+        the processing pipeline. Stops when track ends or connection closes.
+        """
+        try:
+            while self.running:
+                try:
+                    frame = await self.track.recv()
+                    await self.pipeline.put_video_frame(frame)
+                except asyncio.CancelledError:
+                    logger.info("Frame collection cancelled")
+                    break
+                except Exception as e:
+                    if "MediaStreamError" in str(type(e)):
+                        logger.info("Media stream ended")
+                    else:
+                        logger.error(f"Error collecting video frames: {str(e)}")
+                    self.running = False
+                    break
+            
+            # Perform cleanup outside the exception handler
+            logger.info("Video frame collection stopped")
+        except asyncio.CancelledError:
+            logger.info("Frame collection task cancelled")
+        except Exception as e:
+            logger.error(f"Unexpected error in frame collection: {str(e)}")
+        finally:
+            await self.pipeline.cleanup()
+
+    async def recv(self):
+        """Receive a processed video frame from the pipeline, increment the frame
+        count for FPS calculation and return the processed frame to the client.
+        """
+        processed_frame = await self.pipeline.get_processed_video_frame()
+
+        # Increment the frame count to calculate FPS.
+        await self.fps_meter.increment_frame_count()
+
+        return processed_frame
+
+
+class AudioStreamTrack(MediaStreamTrack):
+    kind = "audio"
+
+    def __init__(self, track: MediaStreamTrack, pipeline):
+        super().__init__()
+        self.track = track
+        self.pipeline = pipeline
+        self.running = True
+        self.collect_task = asyncio.create_task(self.collect_frames())
+        
+        # Add cleanup when track ends
+        @track.on("ended")
+        async def on_ended():
+            logger.info("Source audio track ended, stopping collection")
+            await cancel_collect_frames(self)
+
+    async def collect_frames(self):
+        """Collect audio frames from the underlying track and pass them to
+        the processing pipeline. Stops when track ends or connection closes.
+        """
+        try:
+            while self.running:
+                try:
+                    frame = await self.track.recv()
+                    await self.pipeline.put_audio_frame(frame)
+                except asyncio.CancelledError:
+                    logger.info("Audio frame collection cancelled")
+                    break
+                except Exception as e:
+                    if "MediaStreamError" in str(type(e)):
+                        logger.info("Media stream ended")
+                    else:
+                        logger.error(f"Error collecting audio frames: {str(e)}")
+                    self.running = False
+                    break
+            
+            # Perform cleanup outside the exception handler
+            logger.info("Audio frame collection stopped")
+        except asyncio.CancelledError:
+            logger.info("Frame collection task cancelled")
+        except Exception as e:
+            logger.error(f"Unexpected error in audio frame collection: {str(e)}")
+        finally:
+            await self.pipeline.cleanup()
+
+    async def recv(self):
+        return await self.pipeline.get_processed_audio_frame()
+
+
+def force_codec(pc, sender, forced_codec):
+    kind = forced_codec.split("/")[0]
+    codecs = RTCRtpSender.getCapabilities(kind).codecs
+    transceiver = next(t for t in pc.getTransceivers() if t.sender == sender)
+    codecPrefs = [codec for codec in codecs if codec.mimeType == forced_codec]
+    transceiver.setCodecPreferences(codecPrefs)
+
+
+def get_twilio_token():
+    account_sid = os.getenv("TWILIO_ACCOUNT_SID")
+    auth_token = os.getenv("TWILIO_AUTH_TOKEN")
+
+    if account_sid is None or auth_token is None:
+        return None
+
+    client = Client(account_sid, auth_token)
+
+    token = client.tokens.create()
+
+    return token
+
+
+def get_ice_servers():
+    ice_servers = []
+
+    token = get_twilio_token()
+    if token is not None:
+        # Use Twilio TURN servers
+        for server in token.ice_servers:
+            if server["url"].startswith("turn:"):
+                turn = RTCIceServer(
+                    urls=[server["urls"]],
+                    credential=server["credential"],
+                    username=server["username"],
+                )
+                ice_servers.append(turn)
+
+    return ice_servers
+
+
+async def offer(request):
+    pipeline = request.app["pipeline"]
+    pcs = request.app["pcs"]
+
+    params = await request.json()
+
+    await pipeline.set_prompts(params["prompts"])
+
+    offer_params = params["offer"]
+    offer = RTCSessionDescription(sdp=offer_params["sdp"], type=offer_params["type"])
+
+    ice_servers = get_ice_servers()
+    if len(ice_servers) > 0:
+        pc = RTCPeerConnection(
+            configuration=RTCConfiguration(iceServers=get_ice_servers())
+        )
+    else:
+        pc = RTCPeerConnection()
+
+    pcs.add(pc)
+
+    tracks = {"video": None, "audio": None}
+
+    # Only add video transceiver if video is present in the offer
+    if "m=video" in offer.sdp:
+        # Prefer h264
+        transceiver = pc.addTransceiver("video")
+        caps = RTCRtpSender.getCapabilities("video")
+        prefs = list(filter(lambda x: x.name == "H264", caps.codecs))
+        transceiver.setCodecPreferences(prefs)
+
+        # Monkey patch max and min bitrate to ensure constant bitrate
+        h264.MAX_BITRATE = MAX_BITRATE
+        h264.MIN_BITRATE = MIN_BITRATE
+
+    # Handle control channel from client
+    @pc.on("datachannel")
+    def on_datachannel(channel):
+        if channel.label == "control":
+
+            @channel.on("message")
+            async def on_message(message):
+                try:
+                    params = json.loads(message)
+
+                    if params.get("type") == "get_nodes":
+                        nodes_info = await pipeline.get_nodes_info()
+                        response = {"type": "nodes_info", "nodes": nodes_info}
+                        channel.send(json.dumps(response))
+                    elif params.get("type") == "update_prompts":
+                        if "prompts" not in params:
+                            logger.warning(
+                                "[Control] Missing prompt in update_prompt message"
+                            )
+                            return
+                        await pipeline.update_prompts(params["prompts"])
+                        response = {"type": "prompts_updated", "success": True}
+                        channel.send(json.dumps(response))
+                    else:
+                        logger.warning(
+                            "[Server] Invalid message format - missing required fields"
+                        )
+                except json.JSONDecodeError:
+                    logger.error("[Server] Invalid JSON received")
+                except Exception as e:
+                    logger.error(f"[Server] Error processing message: {str(e)}")
+
+    @pc.on("track")
+    def on_track(track):
+        logger.info(f"Track received: {track.kind}")
+        if track.kind == "video":
+            videoTrack = VideoStreamTrack(track, pipeline)
+            tracks["video"] = videoTrack
+            sender = pc.addTrack(videoTrack)
+
+            # Store video track in app for stats.
+            stream_id = track.id
+            request.app["video_tracks"][stream_id] = videoTrack
+
+            codec = "video/H264"
+            force_codec(pc, sender, codec)
+        elif track.kind == "audio":
+            audioTrack = AudioStreamTrack(track, pipeline)
+            tracks["audio"] = audioTrack
+            pc.addTrack(audioTrack)
+
+        @track.on("ended")
+        async def on_ended():
+            logger.info(f"{track.kind} track ended")
+            request.app["video_tracks"].pop(track.id, None)
+
+    @pc.on("connectionstatechange")
+    async def on_connectionstatechange():
+        logger.info(f"Connection state is: {pc.connectionState}")
+        if pc.connectionState == "failed":
+            await pc.close()
+            pcs.discard(pc)
+        elif pc.connectionState == "closed":
+            await pc.close()
+            pcs.discard(pc)
+
+    await pc.setRemoteDescription(offer)
+
+    if "m=audio" in pc.remoteDescription.sdp:
+        await pipeline.warm_audio()
+    if "m=video" in pc.remoteDescription.sdp:
+        await pipeline.warm_video()
+
+    answer = await pc.createAnswer()
+    await pc.setLocalDescription(answer)
+
+    return web.Response(
+        content_type="application/json",
+        text=json.dumps(
+            {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
+        ),
+    )
+
+async def cancel_collect_frames(track):
+    track.running = False
+    if hasattr(track, 'collect_task') is not None and not track.collect_task.done():
+        try:
+            track.collect_task.cancel()
+            await track.collect_task
+        except (asyncio.CancelledError):
+            pass
+
+async def set_prompt(request):
+    pipeline = request.app["pipeline"]
+
+    prompt = await request.json()
+    await pipeline.set_prompts(prompt)
+
+    return web.Response(content_type="application/json", text="OK")
+
+
+def health(_):
+    return web.Response(content_type="application/json", text="OK")
+
+
+async def on_startup(app: web.Application):
+    if app["media_ports"]:
+        patch_loop_datagram(app["media_ports"])
+
+    app["pipeline"] = Pipeline(
+        cwd=app["workspace"], disable_cuda_malloc=True, gpu_only=True, preview_method='none'
+    )
+    app["pcs"] = set()
+    app["video_tracks"] = {}
+
+
+async def on_shutdown(app: web.Application):
+    pcs = app["pcs"]
+    coros = [pc.close() for pc in pcs]
+    await asyncio.gather(*coros)
+    pcs.clear()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run comfystream server")
+    parser.add_argument("--port", default=8889, help="Set the signaling port")
+    parser.add_argument(
+        "--media-ports", default=None, help="Set the UDP ports for WebRTC media"
+    )
+    parser.add_argument("--host", default="127.0.0.1", help="Set the host")
+    parser.add_argument(
+        "--workspace", default=None, required=True, help="Set Comfy workspace"
+    )
+    parser.add_argument(
+        "--log-level",
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Set the logging level",
+    )
+    parser.add_argument(
+        "--monitor",
+        default=False,
+        action="store_true",
+        help="Start a Prometheus metrics endpoint for monitoring.",
+    )
+    parser.add_argument(
+        "--stream-id-label",
+        default=False,
+        action="store_true",
+        help="Include stream ID as a label in Prometheus metrics.",
+    )
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=args.log_level.upper(),
+        format="%(asctime)s [%(levelname)s] %(message)s",
+        datefmt="%H:%M:%S",
+    )
+
+    app = web.Application()
+    app["media_ports"] = args.media_ports.split(",") if args.media_ports else None
+    app["workspace"] = args.workspace
+
+    app.on_startup.append(on_startup)
+    app.on_shutdown.append(on_shutdown)
+
+    app.router.add_get("/", health)
+    app.router.add_get("/health", health)
+
+    # WebRTC signalling and control routes.
+    app.router.add_post("/offer", offer)
+    app.router.add_post("/prompt", set_prompt)
+
+    # Add routes for getting stream statistics.
+    stream_stats_manager = StreamStatsManager(app)
+    app.router.add_get(
+        "/streams/stats", stream_stats_manager.collect_all_stream_metrics
+    )
+    app.router.add_get(
+        "/stream/{stream_id}/stats", stream_stats_manager.collect_stream_metrics_by_id
+    )
+
+    # Add Prometheus metrics endpoint.
+    app["metrics_manager"] = MetricsManager(include_stream_id=args.stream_id_label)
+    if args.monitor:
+        app["metrics_manager"].enable()
+        logger.info(
+            f"Monitoring enabled - Prometheus metrics available at: "
+            f"http://{args.host}:{args.port}/metrics"
+        )
+        app.router.add_get("/metrics", app["metrics_manager"].metrics_handler)
+
+    # Add hosted platform route prefix.
+    # NOTE: This ensures that the local and hosted experiences have consistent routes.
+    add_prefix_to_app_routes(app, "/live")
+
+    def force_print(*args, **kwargs):
+        print(*args, **kwargs, flush=True)
+        sys.stdout.flush()
+
+    web.run_app(app, host=args.host, port=int(args.port), print=force_print)
diff --git a/server/pipeline_api.py b/server/pipeline_api.py
new file mode 100644
index 00000000..0f04e773
--- /dev/null
+++ b/server/pipeline_api.py
@@ -0,0 +1,195 @@
+import av
+import torch
+import numpy as np
+import asyncio
+import logging
+import time
+from PIL import Image
+from io import BytesIO
+
+from typing import Any, Dict, Union, List
+from comfystream.client_api import ComfyStreamClient
+from comfystream import tensor_cache
+
+WARMUP_RUNS = 5
+logger = logging.getLogger(__name__)
+
+
+class Pipeline:
+    def __init__(self, **kwargs):
+        self.client = ComfyStreamClient(**kwargs)
+        self.video_incoming_frames = asyncio.Queue()
+        self.audio_incoming_frames = asyncio.Queue()
+
+        self.processed_audio_buffer = np.array([], dtype=np.int16)
+
+    async def warm_video(self):
+        """Warm up the video pipeline with dummy frames"""
+        logger.info("Warming up video pipeline...")
+        
+        # Create a properly formatted dummy frame (random color pattern)
+        # Using standard tensor shape: BCHW [1, 3, 512, 512]
+        tensor = torch.rand(1, 3, 512, 512)  # Random values in [0,1]
+        
+        # Create a dummy frame and attach the tensor as side_data
+        dummy_frame = av.VideoFrame(width=512, height=512, format="rgb24")
+        dummy_frame.side_data.input = tensor
+        
+        # Process a few frames for warmup
+        for i in range(WARMUP_RUNS):
+            logger.info(f"Video warmup iteration {i+1}/{WARMUP_RUNS}")
+            self.client.put_video_input(dummy_frame)
+            await self.client.get_video_output()
+        
+        logger.info("Video pipeline warmup complete")
+
+    async def warm_audio(self):
+        dummy_frame = av.AudioFrame()
+        dummy_frame.side_data.input = np.random.randint(-32768, 32767, int(48000 * 0.5), dtype=np.int16)   # TODO: adds a lot of delay if it doesn't match the buffer size, is warmup needed?
+        dummy_frame.sample_rate = 48000
+
+        for _ in range(WARMUP_RUNS):
+            self.client.put_audio_input(dummy_frame)
+            await self.client.get_audio_output()
+
+    async def set_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any]]]):
+        if isinstance(prompts, list):
+            await self.client.set_prompts(prompts)
+        else:
+            await self.client.set_prompts([prompts])
+
+    async def update_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any]]]):
+        if isinstance(prompts, list):
+            await self.client.update_prompts(prompts)
+        else:
+            await self.client.update_prompts([prompts])
+
+    async def put_video_frame(self, frame: av.VideoFrame):
+        frame.side_data.input = self.video_preprocess(frame)
+        frame.side_data.skipped = False # Different from LoadTensor, we don't skip frames here
+        self.client.put_video_input(frame)
+        await self.video_incoming_frames.put(frame)
+
+    async def put_audio_frame(self, frame: av.AudioFrame):
+        frame.side_data.input = self.audio_preprocess(frame)
+        frame.side_data.skipped = False
+        self.client.put_audio_input(frame)
+        await self.audio_incoming_frames.put(frame)
+
+    def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarray]:
+        """Convert input video frame to tensor in consistent BCHW format"""
+        try:
+            frame_np = frame.to_ndarray(format="rgb24")
+            frame_np = frame_np.astype(np.float32) / 255.0
+            tensor = torch.from_numpy(frame_np)
+
+            # TODO: Necessary?
+            if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
+                tensor = tensor.permute(2, 0, 1).unsqueeze(0)  # -> BCHW
+            
+            # Ensure values are in range [0,1]
+            if tensor.min() < 0 or tensor.max() > 1:
+                logger.warning(f"Clamping preprocessing tensor: min={tensor.min().item()}, max={tensor.max().item()}")
+                tensor = torch.clamp(tensor, 0, 1)
+
+            return tensor
+            
+        except Exception as e:
+            logger.error(f"Error in video_preprocess: {e}")
+            # Return a default tensor in case of error
+            return torch.zeros(1, 3, frame.height, frame.width)
+    
+    def audio_preprocess(self, frame: av.AudioFrame) -> Union[torch.Tensor, np.ndarray]:
+        return frame.to_ndarray().ravel().reshape(-1, 2).mean(axis=1).astype(np.int16)
+    
+    def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
+        """Convert tensor to VideoFrame format"""
+        try:
+            # Ensure output is a tensor
+            if isinstance(output, np.ndarray):
+                output = torch.from_numpy(output)
+            
+            # Convert from BCHW to HWC format for video frame
+            if len(output.shape) == 4:  # BCHW format
+                output = output.squeeze(0)  # Remove batch dimension
+            if output.shape[0] == 3:  # CHW format
+                output = output.permute(1, 2, 0)  # Convert to HWC
+            
+            # Convert to numpy array in correct format for VideoFrame
+            frame_np = (output * 255.0).clamp(0, 255).to(dtype=torch.uint8).cpu().numpy()
+            
+            # Create VideoFrame with RGB format
+            video_frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
+            
+            logger.info(f"Created video frame with shape: {frame_np.shape}")
+            return video_frame
+        
+        except Exception as e:
+            logger.error(f"Error in video_postprocess: {str(e)}")
+            # Return a black frame as fallback
+            return av.VideoFrame(width=512, height=512, format='rgb24')
+
+    def audio_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.AudioFrame:
+        return av.AudioFrame.from_ndarray(np.repeat(output, 2).reshape(1, -1))
+    
+    async def get_processed_video_frame(self):
+        """Get processed video frame from output queue and match it with input frame"""
+        try:
+            # Get the frame from the incoming queue first
+            frame = await self.video_incoming_frames.get()
+            
+            while frame.side_data.skipped:
+                frame = await self.video_incoming_frames.get()
+
+            # Get the processed frame from the output queue
+            logger.info("Getting video output")
+            out_tensor = await self.client.get_video_output()
+            
+            # If there are more frames in the output queue, drain them to get the most recent
+            # This helps with synchronization when processing is faster than display
+            while not tensor_cache.image_outputs.empty():
+                try:
+                    newer_tensor = await asyncio.wait_for(self.client.get_video_output(), 0.01)
+                    out_tensor = newer_tensor  # Use the most recent frame
+                    logger.info("Using more recent frame from output queue")
+                except asyncio.TimeoutError:
+                    break
+                
+            logger.info(f"Received output tensor with shape: {out_tensor.shape if hasattr(out_tensor, 'shape') else 'unknown'}")
+            
+            # Process the output tensor
+            processed_frame = self.video_postprocess(out_tensor)
+            processed_frame.pts = frame.pts
+            processed_frame.time_base = frame.time_base
+            
+            return processed_frame
+            
+        except Exception as e:
+            logger.error(f"Error in get_processed_video_frame: {str(e)}")
+            # Create a black frame as fallback
+            black_frame = av.VideoFrame(width=512, height=512, format='rgb24')
+            return black_frame
+
+    async def get_processed_audio_frame(self):
+        # TODO: make it generic to support purely generative audio cases and also add frame skipping
+        frame = await self.audio_incoming_frames.get()
+        if frame.samples > len(self.processed_audio_buffer):
+            out_tensor = await self.client.get_audio_output()
+            self.processed_audio_buffer = np.concatenate([self.processed_audio_buffer, out_tensor])
+        out_data = self.processed_audio_buffer[:frame.samples]
+        self.processed_audio_buffer = self.processed_audio_buffer[frame.samples:]
+
+        processed_frame = self.audio_postprocess(out_data)
+        processed_frame.pts = frame.pts
+        processed_frame.time_base = frame.time_base
+        processed_frame.sample_rate = frame.sample_rate
+        
+        return processed_frame
+    
+    async def get_nodes_info(self) -> Dict[str, Any]:
+        """Get information about all nodes in the current prompt including metadata."""
+        nodes_info = await self.client.get_available_nodes()
+        return nodes_info
+    
+    async def cleanup(self):
+        await self.client.cleanup()
\ No newline at end of file
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
new file mode 100644
index 00000000..5e23e391
--- /dev/null
+++ b/src/comfystream/client_api.py
@@ -0,0 +1,836 @@
+import asyncio
+import json
+import uuid
+import websockets
+import base64
+import aiohttp
+import logging
+import torch
+import numpy as np
+from io import BytesIO
+from PIL import Image
+from typing import List, Dict, Any, Optional, Union
+import random
+import time
+
+from comfystream import tensor_cache
+from comfystream.utils_api import convert_prompt
+
+logger = logging.getLogger(__name__)
+
+class ComfyStreamClient:
+    def __init__(self, host: str = "127.0.0.1", port: int = 8198, **kwargs):
+        """
+        Initialize the ComfyStream client to use the ComfyUI API.
+        
+        Args:
+            host: The hostname or IP address of the ComfyUI server
+            port: The port number of the ComfyUI server
+            **kwargs: Additional configuration parameters
+        """
+        self.host = host
+        self.port = port
+        self.server_address = f"ws://{host}:{port}/ws"
+        self.api_base_url = f"http://{host}:{port}/api"
+        self.client_id = kwargs.get('client_id', str(uuid.uuid4()))
+        self.api_version = kwargs.get('api_version', "1.0.0")
+        self.ws = None
+        self.current_prompts = []
+        self.running_prompts = {}
+        self.cleanup_lock = asyncio.Lock()
+        
+        # WebSocket connection
+        self._ws_listener_task = None
+        self.execution_complete_event = asyncio.Event()
+        self.execution_started = False
+        self._prompt_id = None
+        
+        # Configure logging
+        if 'log_level' in kwargs:
+            logger.setLevel(kwargs['log_level'])
+        
+        # Enable debug mode
+        self.debug = kwargs.get('debug', True)
+
+        logger.info(f"ComfyStreamClient initialized with host: {host}, port: {port}, client_id: {self.client_id}")
+    
+    async def set_prompts(self, prompts: List[Dict]):
+        """Set prompts and run them (compatible with original interface)"""
+        # Convert prompts (this already randomizes seeds, but we'll enhance it)
+        self.current_prompts = [convert_prompt(prompt) for prompt in prompts]
+        
+        # Create tasks for each prompt
+        for idx in range(len(self.current_prompts)):
+            task = asyncio.create_task(self.run_prompt(idx))
+            self.running_prompts[idx] = task
+            
+        logger.info(f"Set {len(self.current_prompts)} prompts for execution")
+    
+    async def update_prompts(self, prompts: List[Dict]):
+        """Update existing prompts (compatible with original interface)"""
+        if len(prompts) != len(self.current_prompts):
+            raise ValueError(
+                "Number of updated prompts must match the number of currently running prompts."
+            )
+        self.current_prompts = [convert_prompt(prompt) for prompt in prompts]
+        logger.info(f"Updated {len(self.current_prompts)} prompts")
+    
+    async def run_prompt(self, prompt_index: int):
+        """Run a prompt continuously, processing new frames as they arrive"""
+        logger.info(f"Running prompt {prompt_index}")
+        
+        # Make sure WebSocket is connected
+        await self._connect_websocket()
+        
+        # Always set execution complete at start to allow first frame to be processed
+        self.execution_complete_event.set()
+        logger.info("Setting execution_complete_event to TRUE at start")
+        
+        try:
+            while True:
+                # Wait until we have tensor data available before sending prompt
+                if tensor_cache.image_inputs.empty():
+                    await asyncio.sleep(0.01)  # Reduced sleep time for faster checking
+                    continue
+                
+                # Clear event before sending a new prompt
+                if self.execution_complete_event.is_set():
+                    # Reset execution state for next frame
+                    self.execution_complete_event.clear()
+                    logger.info("Setting execution_complete_event to FALSE before executing prompt")
+                    
+                    # Queue the prompt with the current frame
+                    await self._execute_prompt(prompt_index)
+                    
+                    # Wait for execution completion with timeout
+                    try:
+                        logger.info("Waiting for execution to complete (max 10 seconds)...")
+                        await asyncio.wait_for(self.execution_complete_event.wait(), timeout=10.0)
+                        logger.info("Execution complete, ready for next frame")
+                    except asyncio.TimeoutError:
+                        logger.error("Timeout waiting for execution, forcing continuation")
+                        self.execution_complete_event.set()
+                else:
+                    # If execution is not complete, check again shortly
+                    await asyncio.sleep(0.01)  # Short sleep to prevent CPU spinning
+                
+        except asyncio.CancelledError:
+            logger.info(f"Prompt {prompt_index} execution cancelled")
+            raise
+        except Exception as e:
+            logger.error(f"Error in run_prompt: {str(e)}")
+            raise
+    
+    async def _connect_websocket(self):
+        """Connect to the ComfyUI WebSocket endpoint"""
+        try:
+            if self.ws is not None and self.ws.open:
+                return self.ws
+
+            # Close existing connection if any
+            if self.ws is not None:
+                try:
+                    await self.ws.close()
+                except:
+                    pass
+                self.ws = None
+            
+            logger.info(f"Connecting to WebSocket at {self.server_address}?clientId={self.client_id}")
+            
+            # Set a reasonable timeout for connection
+            websocket_timeout = 10.0  # seconds
+            
+            try:
+                # Connect with proper error handling
+                self.ws = await websockets.connect(
+                    f"{self.server_address}?clientId={self.client_id}",
+                    ping_interval=5,
+                    ping_timeout=10,
+                    close_timeout=5,
+                    max_size=None,  # No limit on message size
+                    ssl=None
+                )
+                
+                logger.info("WebSocket connected successfully")
+                
+                # Start the listener task if not already running
+                if self._ws_listener_task is None or self._ws_listener_task.done():
+                    self._ws_listener_task = asyncio.create_task(self._ws_listener())
+                    logger.info("Started WebSocket listener task")
+                    
+                return self.ws
+                
+            except (websockets.exceptions.WebSocketException, ConnectionError, OSError) as e:
+                logger.error(f"WebSocket connection error: {e}")
+                self.ws = None
+                # Signal execution complete to prevent hanging if connection fails
+                self.execution_complete_event.set()
+                # Retry after a delay
+                await asyncio.sleep(1)
+                return await self._connect_websocket()
+                
+        except Exception as e:
+            logger.error(f"Unexpected error in _connect_websocket: {e}")
+            self.ws = None
+            # Signal execution complete to prevent hanging
+            self.execution_complete_event.set()
+            return None
+    
+    async def _ws_listener(self):
+        """Listen for WebSocket messages and process them"""
+        try:
+            logger.info(f"WebSocket listener started")
+            while True:
+                if self.ws is None:
+                    try:
+                        await self._connect_websocket()
+                    except Exception as e:
+                        logger.error(f"Error connecting to WebSocket: {e}")
+                        await asyncio.sleep(1)
+                        continue
+                
+                try:
+                    # Receive and process messages
+                    message = await self.ws.recv()
+
+                    if isinstance(message, str):
+                        # Process JSON messages
+                        await self._handle_text_message(message)
+                    else:
+                        # Handle binary data - likely image preview or tensor data
+                        await self._handle_binary_message(message)
+                    
+                except websockets.exceptions.ConnectionClosed:
+                    logger.info("WebSocket connection closed")
+                    self.ws = None
+                    await asyncio.sleep(1)
+                except Exception as e:
+                    logger.error(f"Error in WebSocket listener: {e}")
+                    await asyncio.sleep(1)
+                    
+        except asyncio.CancelledError:
+            logger.info("WebSocket listener cancelled")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in WebSocket listener: {e}")
+    
+    async def _handle_text_message(self, message: str):
+        """Process text (JSON) messages from the WebSocket"""
+        try:
+            data = json.loads(message)
+            message_type = data.get("type", "unknown")
+
+            # logger.info(f"Received message type: {message_type}")
+            
+            # Handle different message types
+            if message_type == "status":
+                pass
+                '''
+                # Status message with comfy_ui's queue information
+                queue_remaining = data.get("data", {}).get("queue_remaining", 0)
+                exec_info = data.get("data", {}).get("exec_info", {})
+                if queue_remaining == 0 and not exec_info:
+                    logger.info("Queue empty, no active execution")
+                else:
+                    logger.info(f"Queue status: {queue_remaining} items remaining")
+                '''
+                
+            elif message_type == "progress":
+                if "data" in data and "value" in data["data"]:
+                    progress = data["data"]["value"]
+                    max_value = data["data"].get("max", 100)
+                    # Log the progress for debugging
+                    # logger.info(f"Progress: {progress}/{max_value}")
+                
+            elif message_type == "execution_start":
+                self.execution_started = True
+                if "data" in data and "prompt_id" in data["data"]:
+                    self._prompt_id = data["data"]["prompt_id"]
+                    # logger.info(f"Execution started for prompt {self._prompt_id}")
+                
+            elif message_type == "executing":
+                self.execution_started = True
+                if "data" in data:
+                    if "prompt_id" in data["data"]:
+                        self._prompt_id = data["data"]["prompt_id"]
+                    if "node" in data["data"]:
+                        node_id = data["data"]["node"]
+                        # ogger.info(f"Executing node: {node_id}")
+            
+            elif message_type in ["execution_cached", "execution_error", "execution_complete", "execution_interrupted"]:
+                # logger.info(f"{message_type} message received for prompt {self._prompt_id}")
+                #self.execution_started = False
+                
+                # Always signal completion for these terminal states
+                # self.execution_complete_event.set()
+                # logger.info(f"Set execution_complete_event from {message_type}")
+                pass
+            
+            elif message_type == "executed":
+                # This is sent when a node is completely done
+                if "data" in data and "node_id" in data["data"]:
+                    node_id = data["data"]["node_id"]
+                    logger.info(f"Node execution complete: {node_id}")
+                    
+                    # Check if this is our SaveTensorAPI node
+                    if "SaveTensorAPI" in str(node_id):
+                        logger.info("SaveTensorAPI node executed, checking for tensor data")
+                        # The binary data should come separately via websocket
+                    
+                    # If we've been running for too long without tensor data, force completion
+                    elif self.execution_started and not self.execution_complete_event.is_set():
+                        # Check if this was the last node
+                        if data.get("data", {}).get("remaining", 0) == 0:
+                            # logger.info("All nodes executed but no tensor data received, forcing completion")
+                            # self.execution_complete_event.set()
+                            pass
+            
+            elif message_type == "executed_node" and "output" in data.get("data", {}):
+                node_id = data.get("data", {}).get("node_id")
+                output_data = data.get("data", {}).get("output", {})
+                prompt_id = data.get("data", {}).get("prompt_id", "unknown")
+                
+                logger.info(f"Node {node_id} executed in prompt {prompt_id}")
+                
+                '''
+                # Check if this is from ETN_SendImageWebSocket node
+                if "ui" in output_data and "images" in output_data["ui"]:
+                    images_info = output_data["ui"]["images"]
+                    logger.info(f"Found image output from ETN_SendImageWebSocket in node {node_id}")
+                    
+                    # Images will be received via binary websocket messages after this event
+                    # The binary handler will take care of them
+                    pass
+                
+                # Keep existing handling for tensor data
+                elif "ui" in output_data and "tensor" in output_data["ui"]:
+                    tensor_info = output_data["ui"]["tensor"]
+                    tensor_id = tensor_info.get("tensor_id", "unknown")
+                    logger.info(f"Found tensor data with ID: {tensor_id} in node {node_id}")
+                    
+                    # Decode the tensor data
+                    tensor_data = await self._decode_tensor_data(tensor_info)
+                    if tensor_data is not None:
+                        # Add to output queue without waiting to unblock event loop
+                        tensor_cache.image_outputs.put_nowait(tensor_data)
+                        logger.info(f"Added tensor to output queue, shape: {tensor_data.shape}")
+                        
+                        # IMPORTANT: Immediately signal that we can proceed with the next frame
+                        # when we receive tensor data, don't wait
+                        logger.info("Received tensor data, immediately signaling execution complete")
+                        self.execution_complete_event.set()
+                        logger.info("Set execution_complete_event after processing tensor data")
+                    else:
+                        logger.error("Failed to decode tensor data")
+                        # Signal completion even if decoding failed to prevent hanging
+                        self.execution_complete_event.set()
+                '''
+        except json.JSONDecodeError:
+            logger.error(f"Invalid JSON message: {message[:100]}...")
+        except Exception as e:
+            logger.error(f"Error handling WebSocket message: {e}")
+            # Signal completion on error to prevent hanging
+            self.execution_complete_event.set()
+    
+    async def _handle_binary_message(self, binary_data):
+        """Process binary messages from the WebSocket"""
+        try:
+            # Log binary message information
+            # logger.info(f"Received binary message of size: {len(binary_data)} bytes")
+
+            # Signal execution is complete, queue next frame
+            self.execution_complete_event.set()
+            
+            # Binary messages in ComfyUI start with a header
+            # First 8 bytes are used for header information
+            if len(binary_data) <= 8:
+                logger.warning(f"Binary message too short: {len(binary_data)} bytes")
+                return
+            
+            # Extract header data based on the actual format observed in logs
+            # Header bytes (hex): 0000000100000001 - this appears to be the format in use
+            event_type = int.from_bytes(binary_data[:4], byteorder='little')
+            format_type = int.from_bytes(binary_data[4:8], byteorder='little')
+            data = binary_data[8:]
+            
+            # Log header details
+            logger.info(f"Binary message header: event_type={event_type}, format_type={format_type}, data_size={len(data)} bytes")
+            #logger.info(f"Header bytes (hex): {binary_data[:8].hex()}")
+            
+            # Check if this is an image (JPEG starts with 0xFF, 0xD8, PNG starts with 0x89, 0x50)
+            is_jpeg = data[:2] == b'\xff\xd8'
+            is_png = data[:4] == b'\x89\x50\x4e\x47'
+            
+            if is_jpeg or is_png:
+                image_format = "JPEG" if is_jpeg else "PNG"
+                logger.info(f"Detected {image_format} image based on magic bytes")
+                
+                # Create a NEW binary message with the expected header format for the JavaScript client
+                # The JavaScript expects: [0:4]=1 (PREVIEW_IMAGE), [4:8]=1 (JPEG format) or [4:8]=2 (PNG format)
+                # This matches exactly what the JS code is looking for:
+                # const event = dataView.getUint32(0);  // event type (1 = PREVIEW_IMAGE)
+                # const format = dataView.getUint32(4); // format (1 = JPEG, 2 = PNG)
+                js_event_type = (1).to_bytes(4, byteorder='little')  # PREVIEW_IMAGE = 1
+                js_format_type = (1 if is_jpeg else 2).to_bytes(4, byteorder='little')
+                transformed_data = js_event_type + js_format_type + data
+                
+                # Forward to WebSocket client if connected
+                # if self.ws:
+                #    await self.ws.send(transformed_data)
+                #    logger.info(f"Sent transformed {image_format} image data to WebSocket with correct JS header format")
+                #else:
+                #    logger.error("WebSocket not connected, cannot forward image to JS client")
+                
+                # Process the image for our pipeline
+                try:
+                    # Decode the image
+                    img = Image.open(BytesIO(data))
+                    logger.info(f"Successfully decoded image: size={img.size}, mode={img.mode}, format={img.format}")
+                    
+                    # Convert to RGB if not already
+                    if img.mode != "RGB":
+                        img = img.convert("RGB")
+                        logger.info(f"Converted image to RGB mode")
+
+                    # Save image to temp folder as a file
+                    # TESTING
+                    '''
+                    import os
+                    import tempfile
+                    temp_folder = os.path.join(tempfile.gettempdir(), "comfyui_images")
+                    os.makedirs(temp_folder, exist_ok=True)
+                    img_path = os.path.join(temp_folder, f"comfyui_image_{time.time()}.png")
+                    img.save(img_path)
+                    logger.info(f"Saved image to {img_path}")
+                    '''
+
+                    # Convert to tensor (normalize to [0,1] range for consistency)
+                    img_np = np.array(img).astype(np.float32) / 255.0
+                    tensor = torch.from_numpy(img_np)
+                    
+                    # CRITICAL: Ensure dimensions are correctly understood
+                    # The tensor should be in HWC format initially from PIL/numpy
+                    logger.info(f"Initial tensor shape from image: {tensor.shape}")
+                    
+                    # Convert from HWC to BCHW format for consistency with model expectations
+                    if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format (H,W,3)
+                        tensor = tensor.permute(2, 0, 1).unsqueeze(0)  # -> BCHW (1,3,H,W)
+                        logger.info(f"Converted to BCHW tensor with shape: {tensor.shape}")
+
+                    # Check for NaN or Inf values
+                    if torch.isnan(tensor).any() or torch.isinf(tensor).any():
+                        logger.warning("Tensor contains NaN or Inf values! Replacing with zeros")
+                        tensor = torch.nan_to_num(tensor, nan=0.0, posinf=1.0, neginf=0.0)
+                    
+                    # Log detailed tensor info for debugging
+                    logger.info(f"Final tensor with shape: {tensor.shape}, "
+                               f"min={tensor.min().item()}, max={tensor.max().item()}, "
+                               f"mean={tensor.mean().item()}")
+                    
+                    # Add to output queue without waiting
+                    tensor_cache.image_outputs.put_nowait(tensor)
+                    logger.info(f"Added tensor to output queue, queue size: {tensor_cache.image_outputs.qsize()}")
+                    return
+                
+                except Exception as img_error:
+                    logger.error(f"Error processing image: {img_error}", exc_info=True)
+                    
+            # If we get here, we couldn't process the image
+            logger.warning("Failed to process image, creating default tensor")
+            default_tensor = torch.zeros(1, 3, 512, 512)
+            tensor_cache.image_outputs.put_nowait(default_tensor)
+            self.execution_complete_event.set()
+            
+        except Exception as e:
+            logger.error(f"Error handling binary message: {e}", exc_info=True)
+            # Set execution complete event to avoid hanging
+            self.execution_complete_event.set()
+    
+    async def _execute_prompt(self, prompt_index: int):
+        """Execute a prompt via the ComfyUI API"""
+        try:
+            # Get the prompt to execute
+            prompt = self.current_prompts[prompt_index]
+            
+            # Ensure all seed values are randomized for every execution
+            # This forces ComfyUI to not use cached results
+            for node_id, node in prompt.items():
+                if isinstance(node, dict) and "inputs" in node:
+                    if "seed" in node["inputs"]:
+                        # Generate a truly random seed each time
+                        random_seed = random.randint(0, 18446744073709551615)
+                        node["inputs"]["seed"] = random_seed
+                        logger.info(f"Randomized seed to {random_seed} for node {node_id}")
+                        
+                    # Also randomize noise_seed if present
+                    if "noise_seed" in node["inputs"]:
+                        noise_seed = random.randint(0, 18446744073709551615)
+                        node["inputs"]["noise_seed"] = noise_seed
+                        logger.info(f"Randomized noise_seed to {noise_seed} for node {node_id}")
+            
+            # Add a timestamp parameter to each node to prevent caching
+            # This is a "hidden" trick to force ComfyUI to consider each execution unique
+            timestamp = int(time.time() * 1000)  # millisecond timestamp
+            for node_id, node in prompt.items():
+                if isinstance(node, dict) and "inputs" in node:
+                    # Add a timestamp parameter to ETN_LoadImageBase64 nodes
+                    if node.get("class_type") in ["ETN_LoadImageBase64", "LoadImageBase64"]:
+                        # Add a unique cache-busting parameter
+                        node["inputs"]["_timestamp"] = timestamp
+                        logger.info(f"Added timestamp {timestamp} to node {node_id}")
+            
+            # Check if we have a frame waiting to be processed
+            if not tensor_cache.image_inputs.empty():
+                logger.info("Found tensor in input queue, preparing for API")
+                # Get the frame from the cache - make sure to get the most recent frame
+                while not tensor_cache.image_inputs.empty():
+                    frame_or_tensor = tensor_cache.image_inputs.get_nowait()
+                
+                # Find ETN_LoadImageBase64 nodes
+                load_image_nodes = []
+                for node_id, node in prompt.items():
+                    if isinstance(node, dict) and node.get("class_type") == "ETN_LoadImageBase64":
+                        load_image_nodes.append(node_id)
+                
+                if not load_image_nodes:
+                    # Also check for regular LoadImageBase64 nodes as fallback
+                    for node_id, node in prompt.items():
+                        if isinstance(node, dict) and node.get("class_type") == "LoadImageBase64":
+                            load_image_nodes.append(node_id)
+                        
+                if not load_image_nodes:
+                    logger.warning("No ETN_LoadImageBase64 or LoadImageBase64 nodes found in the prompt")
+                    self.execution_complete_event.set()  # Signal completion
+                    return
+                else:
+                    # Convert the frame/tensor to base64 and include directly in the prompt
+                    try:
+                        # Get the actual tensor data - handle different input types
+                        tensor = None
+                        
+                        # Check if it's a PyAV VideoFrame with preprocessed tensor in side_data
+                        if hasattr(frame_or_tensor, 'side_data') and hasattr(frame_or_tensor.side_data, 'input'):
+                            tensor = frame_or_tensor.side_data.input
+                            logger.info(f"Using preprocessed tensor from frame.side_data.input with shape {tensor.shape}")
+                        # Check if it's a PyTorch tensor
+                        elif isinstance(frame_or_tensor, torch.Tensor):
+                            tensor = frame_or_tensor
+                            logger.info(f"Using tensor directly with shape {tensor.shape}")
+                        # Check if it's a numpy array
+                        elif isinstance(frame_or_tensor, np.ndarray):
+                            tensor = torch.from_numpy(frame_or_tensor).float()
+                            logger.info(f"Converted numpy array to tensor with shape {tensor.shape}")
+                        else:
+                            # If it's a PyAV frame without preprocessed data, convert it
+                            try:
+                                if hasattr(frame_or_tensor, 'to_ndarray'):
+                                    frame_np = frame_or_tensor.to_ndarray(format="rgb24").astype(np.float32) / 255.0
+                                    tensor = torch.from_numpy(frame_np).unsqueeze(0)
+                                    logger.info(f"Converted PyAV frame to tensor with shape {tensor.shape}")
+                                else:
+                                    logger.error(f"Unsupported frame type: {type(frame_or_tensor)}")
+                                    self.execution_complete_event.set()
+                                    return
+                            except Exception as e:
+                                logger.error(f"Error converting frame to tensor: {e}")
+                                self.execution_complete_event.set()
+                                return
+                        
+                        if tensor is None:
+                            logger.error("Failed to get valid tensor data from input")
+                            self.execution_complete_event.set()
+                            return
+                        
+                        # Now process the tensor (which should be a proper PyTorch tensor)
+                        # Ensure it's a tensor on CPU and detached
+                        tensor = tensor.detach().cpu().float()
+                        
+                        # Handle different formats
+                        if len(tensor.shape) == 4:  # BCHW format (batch)
+                            tensor = tensor[0]  # Take first image from batch
+                        
+                        # Ensure it's in CHW format
+                        if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
+                            tensor = tensor.permute(2, 0, 1)  # Convert to CHW
+                        
+                        # Convert to PIL image for saving
+                        tensor_np = (tensor.permute(1, 2, 0) * 255).clamp(0, 255).numpy().astype(np.uint8)
+                        img = Image.fromarray(tensor_np)
+                        
+                        # Save as PNG to BytesIO and convert to base64 string
+                        buffer = BytesIO()
+                        img.save(buffer, format="PNG")
+                        buffer.seek(0)
+                        
+                        # Encode as base64 - for ETN_LoadImageBase64, we need the raw base64 string
+                        img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                        logger.info(f"Created base64 string of length: {len(img_base64)}")
+                        
+                        # Update all ETN_LoadImageBase64 nodes with the base64 data
+                        for node_id in load_image_nodes:
+                            prompt[node_id]["inputs"]["image"] = img_base64
+                            # Add a small random suffix to image data to prevent caching
+                            rand_suffix = str(random.randint(1, 1000000))
+                            prompt[node_id]["inputs"]["_cache_buster"] = rand_suffix
+                            logger.info(f"Updated node {node_id} with base64 string and cache buster {rand_suffix}")
+                    
+                    except Exception as e:
+                        logger.error(f"Error converting tensor to base64: {e}")
+                        # Signal execution complete in case of error
+                        self.execution_complete_event.set()
+                        return
+            else:
+                logger.info("No tensor in input queue, skipping prompt execution")
+                self.execution_complete_event.set()  # Signal completion
+                return
+            
+            # Execute the prompt via API
+            async with aiohttp.ClientSession() as session:
+                api_url = f"{self.api_base_url}/prompt"
+                payload = {
+                    "prompt": prompt,
+                    "client_id": self.client_id
+                }
+                
+                # Send the request
+                logger.info(f"Sending prompt to {api_url}")
+                async with session.post(api_url, json=payload) as response:
+                    if response.status == 200:
+                        result = await response.json()
+                        self._prompt_id = result.get("prompt_id")
+                        logger.info(f"Prompt queued with ID: {self._prompt_id}")
+                        self.execution_started = True
+                    else:
+                        error_text = await response.text()
+                        logger.error(f"Error queueing prompt: {response.status} - {error_text}")
+                        # Signal execution complete in case of error
+                        self.execution_complete_event.set()
+                
+        except aiohttp.ClientError as e:
+            logger.error(f"Client error queueing prompt: {e}")
+            self.execution_complete_event.set()
+        except Exception as e:
+            logger.error(f"Error executing prompt: {e}")
+            # Signal execution complete in case of error
+            self.execution_complete_event.set()
+    
+    async def _send_tensor_via_websocket(self, tensor):
+        """Send tensor data via the websocket connection"""
+        try:
+            if self.ws is None:
+                logger.error("WebSocket not connected, cannot send tensor")
+                self.execution_complete_event.set()  # Prevent hanging
+                return
+            
+            # Convert the tensor to image format for sending
+            if isinstance(tensor, np.ndarray):
+                tensor = torch.from_numpy(tensor).float()
+            
+            # Ensure on CPU and correct format
+            tensor = tensor.detach().cpu().float()
+            
+            # Prepare binary data
+            if len(tensor.shape) == 4:  # BCHW format (batch of images)
+                if tensor.shape[0] > 1:
+                    logger.info(f"Taking first image from batch of {tensor.shape[0]}")
+                tensor = tensor[0]  # Take first image if batch
+            
+            # Ensure CHW format (3 channels)
+            if len(tensor.shape) == 3:
+                if tensor.shape[0] != 3 and tensor.shape[2] == 3:  # HWC format
+                    tensor = tensor.permute(2, 0, 1)  # Convert to CHW
+                elif tensor.shape[0] != 3:
+                    logger.warning(f"Tensor doesn't have 3 channels: {tensor.shape}. Creating standard tensor.")
+                    # Create a standard RGB tensor
+                    tensor = torch.zeros(3, 512, 512)
+            else:
+                logger.warning(f"Tensor has unexpected shape: {tensor.shape}. Creating standard tensor.")
+                # Create a standard RGB tensor
+                tensor = torch.zeros(3, 512, 512)
+            
+            # Check tensor dimensions and log detailed info
+            logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
+            
+            # CRITICAL FIX: The issue is with the shape - no need to resize if dimensions are fine
+            if tensor.shape[1] < 64 or tensor.shape[2] < 64:
+                logger.warning(f"Tensor dimensions too small: {tensor.shape}. Resizing to 512x512")
+                import torch.nn.functional as F
+                tensor = tensor.unsqueeze(0)  # Add batch dimension for interpolate
+                tensor = F.interpolate(tensor, size=(512, 512), mode='bilinear', align_corners=False)
+                tensor = tensor.squeeze(0)  # Remove batch dimension after resize
+                logger.info(f"Resized tensor to: {tensor.shape}")
+            
+            # Check for NaN or Inf values
+            if torch.isnan(tensor).any() or torch.isinf(tensor).any():
+                logger.warning("Tensor contains NaN or Inf values! Replacing with zeros.")
+                tensor = torch.nan_to_num(tensor, nan=0.0, posinf=1.0, neginf=0.0)
+            
+            # Convert to image (HWC for PIL)
+            tensor_np = (tensor.permute(1, 2, 0) * 255).clamp(0, 255).numpy().astype(np.uint8)
+            img = Image.fromarray(tensor_np)
+            
+            logger.info(f"Converted to PIL image with dimensions: {img.size}")
+            
+            # Convert to PNG 
+            buffer = BytesIO()
+            img.save(buffer, format="PNG")
+            buffer.seek(0)
+            img_bytes = buffer.getvalue()
+            
+            # CRITICAL FIX: We need to send the binary data with a proper node ID prefix
+            # LoadTensorAPI node expects this header format to identify the target node
+            # The first 4 bytes are the message type (3 for binary tensor) and the next 4 are the node ID
+            # Since we don't know the exact node ID, we'll use a generic one that will be interpreted as 
+            # "send this to the currently waiting LoadTensorAPI node"
+            
+            # Build header (8 bytes total)
+            header = bytearray()
+            # Message type 3 (custom binary tensor data)
+            header.extend((3).to_bytes(4, byteorder='little'))
+            # Generic node ID (0 means "send to whatever node is waiting")
+            header.extend((0).to_bytes(4, byteorder='little'))
+            
+            # Combine header and image data
+            full_data = header + img_bytes
+            
+            # Send binary data via websocket
+            await self.ws.send(full_data)
+            logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
+            
+        except Exception as e:
+            logger.error(f"Error sending tensor via websocket: {e}")
+            
+            # Signal execution complete in case of error
+            self.execution_complete_event.set()
+    
+    async def cleanup(self):
+        """Clean up resources"""
+        async with self.cleanup_lock:
+            # Cancel all running tasks
+            for task in self.running_prompts.values():
+                if not task.done():
+                    task.cancel()
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass
+            self.running_prompts.clear()
+            
+            # Close WebSocket connection
+            if self.ws:
+                try:
+                    await self.ws.close()
+                except Exception as e:
+                    logger.error(f"Error closing WebSocket: {e}")
+                self.ws = None
+            
+            # Cancel WebSocket listener task
+            if self._ws_listener_task and not self._ws_listener_task.done():
+                self._ws_listener_task.cancel()
+                try:
+                    await self._ws_listener_task
+                except asyncio.CancelledError:
+                    pass
+                self._ws_listener_task = None
+            
+            await self.cleanup_queues()
+            logger.info("Client cleanup complete")
+    
+    async def cleanup_queues(self):
+        """Clean up tensor queues"""
+        while not tensor_cache.image_inputs.empty():
+            tensor_cache.image_inputs.get()
+
+        while not tensor_cache.audio_inputs.empty():
+            tensor_cache.audio_inputs.get()
+
+        while tensor_cache.image_outputs.qsize() > 0:
+            try:
+                await tensor_cache.image_outputs.get()
+            except:
+                pass
+
+        while tensor_cache.audio_outputs.qsize() > 0:
+            try:
+                await tensor_cache.audio_outputs.get()
+            except:
+                pass
+        
+        logger.info("Tensor queues cleared")
+    
+    def put_video_input(self, tensor: Union[torch.Tensor, np.ndarray]):
+        """
+        Put a video TENSOR into the tensor cache for processing.
+        
+        Args:
+            tensor: Video frame as a tensor (or numpy array)
+        """
+        try:
+            # Only remove one frame if the queue is full (like in client.py)
+            if tensor_cache.image_inputs.full():
+                tensor_cache.image_inputs.get_nowait()
+            
+            # Ensure tensor is detached if it's a torch tensor
+            if isinstance(tensor, torch.Tensor):
+                tensor = tensor.detach()
+                
+            tensor_cache.image_inputs.put(tensor)
+            
+        except Exception as e:
+            logger.error(f"Error in put_video_input: {e}")
+    
+    def put_audio_input(self, frame):
+        """Put audio frame into tensor cache"""
+        tensor_cache.audio_inputs.put(frame)
+        
+    async def get_video_output(self):
+        """Get processed video frame from tensor cache"""
+        logger.info("Waiting for processed tensor from output queue")
+        result = await tensor_cache.image_outputs.get()
+        logger.info(f"Got processed tensor from output queue: shape={result.shape if hasattr(result, 'shape') else 'unknown'}")
+        return result
+    
+    async def get_audio_output(self):
+        """Get processed audio frame from tensor cache"""
+        return await tensor_cache.audio_outputs.get()
+        
+    async def get_available_nodes(self):
+        """Get metadata and available nodes info for current prompts"""
+        try:
+            async with aiohttp.ClientSession() as session:
+                url = f"{self.api_base_url}/object_info"
+                async with session.get(url) as response:
+                    if response.status == 200:
+                        data = await response.json()
+                        
+                        # Format node info similar to the embedded client response
+                        all_prompts_nodes_info = {}
+                        
+                        for prompt_index, prompt in enumerate(self.current_prompts):
+                            nodes_info = {}
+                            
+                            for node_id, node in prompt.items():
+                                class_type = node.get('class_type')
+                                if class_type:
+                                    nodes_info[node_id] = {
+                                        'class_type': class_type,
+                                        'inputs': {}
+                                    }
+                                    
+                                    if 'inputs' in node:
+                                        for input_name, input_value in node['inputs'].items():
+                                            nodes_info[node_id]['inputs'][input_name] = {
+                                                'value': input_value,
+                                                'type': 'unknown'  # We don't have type information
+                                            }
+                            
+                            all_prompts_nodes_info[prompt_index] = nodes_info
+                        
+                        return all_prompts_nodes_info
+                        
+                    else:
+                        logger.error(f"Error getting node info: {response.status}")
+                        return {}
+        except Exception as e:
+            logger.error(f"Error getting node info: {str(e)}")
+            return {}
\ No newline at end of file
diff --git a/src/comfystream/utils_api.py b/src/comfystream/utils_api.py
new file mode 100644
index 00000000..8b93ec7f
--- /dev/null
+++ b/src/comfystream/utils_api.py
@@ -0,0 +1,245 @@
+import copy
+import random
+
+from typing import Dict, Any
+# from comfy.api.components.schema.prompt import Prompt, PromptDictInput
+
+import logging
+
+def create_load_tensor_node():
+    return {
+        "inputs": {
+            "tensor_data": ""  # Empty tensor data that will be filled at runtime
+        },
+        "class_type": "LoadTensorAPI",
+        "_meta": {"title": "Load Tensor (API)"},
+    }
+
+
+def create_save_tensor_node(inputs: Dict[Any, Any]):
+    """Create a SaveTensorAPI node with proper input formatting"""
+    # Make sure images input is properly formatted [node_id, output_index]
+    images_input = inputs.get("images")
+    
+    # If images input is not properly formatted as [node_id, output_index]
+    if not isinstance(images_input, list) or len(images_input) != 2:
+        print(f"Warning: Invalid images input format: {images_input}, using default")
+        images_input = ["", 0]  # Default empty value
+    
+    return {
+        "inputs": {
+            "images": images_input,  # Should be [node_id, output_index]
+            "format": "png",  # Better default than JPG for quality
+            "quality": 95
+        },
+        "class_type": "SaveTensorAPI", 
+        "_meta": {"title": "Save Tensor (API)"},
+    }
+
+def convert_prompt(prompt):
+
+    logging.info("Converting prompt: %s", prompt)
+
+    # Set random seeds for any seed nodes
+    for key, node in prompt.items():
+        if not isinstance(node, dict) or "inputs" not in node:
+            continue
+            
+        # Check if this node has a seed input directly
+        if "seed" in node.get("inputs", {}):
+            # Generate a random seed (same range as JavaScript's Math.random() * 18446744073709552000)
+            random_seed = random.randint(0, 18446744073709551615)
+            node["inputs"]["seed"] = random_seed
+            print(f"Set random seed {random_seed} for node {key}")
+            
+    return prompt
+
+'''
+def convert_prompt(prompt):
+
+    # Check if this is a ComfyUI web UI format prompt with 'nodes' and 'links'
+    if isinstance(prompt, dict) and 'nodes' in prompt and 'links' in prompt:
+        # Convert the web UI prompt format to the API format
+        api_prompt = {}
+        
+        # Process each node
+        for node in prompt['nodes']:
+            node_id = str(node['id'])
+            
+            # Create a node entry in the API format
+            api_prompt[node_id] = {
+                'class_type': node.get('type', 'Unknown'),
+                'inputs': {},
+                '_meta': {
+                    'title': node.get('type', 'Unknown')
+                }
+            }
+            
+            # Process inputs
+            if 'inputs' in node:
+                for input_data in node['inputs']:
+                    input_name = input_data.get('name')
+                    link_id = input_data.get('link')
+                    
+                    if input_name and link_id is not None:
+                        # Find the source of this link
+                        for link in prompt['links']:
+                            if link[0] == link_id:  # link ID matches
+                                # Get source node and output slot
+                                source_node_id = str(link[1])
+                                source_slot = link[3]
+                                
+                                # Add to inputs
+                                api_prompt[node_id]['inputs'][input_name] = [
+                                    source_node_id,
+                                    source_slot
+                                ]
+                                break
+                        # If no link found, set to empty value
+                        if input_name not in api_prompt[node_id]['inputs']:
+                            api_prompt[node_id]['inputs'][input_name] = None
+            
+            # Process widget values
+            if 'widgets_values' in node:
+                for i, widget_value in enumerate(node.get('widgets_values', [])):
+                    # Try to determine widget name from properties or use index
+                    widget_name = f"widget_{i}"
+                    # Add to inputs
+                    api_prompt[node_id]['inputs'][widget_name] = widget_value
+        
+        # Use this converted prompt instead
+        prompt = api_prompt
+    
+    # Now process as normal API format prompt
+    prompt = copy.deepcopy(prompt)
+    
+    # Set random seeds for any seed nodes
+    for key, node in prompt.items():
+        if not isinstance(node, dict) or "inputs" not in node:
+            continue
+            
+        # Check if this node has a seed input directly
+        if "seed" in node.get("inputs", {}):
+            # Generate a random seed (same range as JavaScript's Math.random() * 18446744073709552000)
+            random_seed = random.randint(0, 18446744073709551615)
+            node["inputs"]["seed"] = random_seed
+            print(f"Set random seed {random_seed} for node {key}")
+
+    num_primary_inputs = 0
+    num_inputs = 0
+    num_outputs = 0
+
+    keys = {
+        "PrimaryInputLoadImage": [],
+        "LoadImage": [],
+        "PreviewImage": [],
+        "SaveImage": [],
+        "LoadTensor": [],
+        "SaveTensor": [],
+        "LoadImageBase64": [],
+        "LoadTensorAPI": [],
+        "SaveTensorAPI": [],
+    }
+    
+    for key, node in prompt.items():
+        if not isinstance(node, dict):
+            continue
+
+        class_type = node.get("class_type", "")
+        
+        # Track primary input and output nodes
+        if class_type in ["PrimaryInput", "PrimaryInputImage"]:
+            num_primary_inputs += 1
+            keys["PrimaryInputLoadImage"].append(key)
+        elif class_type in ["LoadImage", "LoadTensor", "LoadAudioTensor", "LoadImageBase64", "LoadTensorAPI"]:
+            num_inputs += 1
+            if class_type == "LoadImage":
+                keys["LoadImage"].append(key)
+            elif class_type == "LoadTensor": 
+                keys["LoadTensor"].append(key)
+            elif class_type == "LoadImageBase64":
+                keys["LoadImageBase64"].append(key)
+            elif class_type == "LoadTensorAPI":
+                keys["LoadTensorAPI"].append(key)
+        elif class_type in ["PreviewImage", "SaveImage", "SaveTensor", "SaveAudioTensor", "SendImageWebSocket", "SaveTensorAPI"]:
+            num_outputs += 1
+            if class_type == "PreviewImage":
+                keys["PreviewImage"].append(key)
+            elif class_type == "SaveImage":
+                keys["SaveImage"].append(key)
+            elif class_type == "SaveTensor":
+                keys["SaveTensor"].append(key)
+            elif class_type == "SaveTensorAPI":
+                keys["SaveTensorAPI"].append(key)
+
+    print(f"Found {num_primary_inputs} primary inputs, {num_inputs} inputs, {num_outputs} outputs")
+
+    # Set up connection for video feeds by replacing LoadImage with LoadImageBase64
+    if num_inputs == 0 and num_primary_inputs == 0:
+        # Add a LoadTensorAPI node
+        new_key = "999990"
+        prompt[new_key] = create_load_tensor_node()
+        keys["LoadTensorAPI"].append(new_key)
+        print("Added LoadTensorAPI node for tensor input")
+    elif len(keys["LoadTensor"]) > 0 and len(keys["LoadTensorAPI"]) == 0:
+        # Replace LoadTensor with LoadTensorAPI if found
+        for key in keys["LoadTensor"]:
+            prompt[key] = create_load_tensor_node()
+            keys["LoadTensorAPI"].append(key)
+        print("Replaced LoadTensor with LoadTensorAPI")
+
+    # Set up connection for output if needed
+    if num_outputs == 0:
+        # Find nodes that produce images
+        image_output_nodes = []
+        for key, node in prompt.items():
+            if isinstance(node, dict):
+                class_type = node.get("class_type", "")
+                # Look for nodes that typically output images
+                if any(output_type in class_type.lower() for output_type in ["vae", "decode", "img", "image", "upscale", "sample"]):
+                    image_output_nodes.append(key)
+                # Also check if the node's RETURN_TYPES includes IMAGE
+                elif "outputs" in node and isinstance(node["outputs"], dict):
+                    for output_name, output_type in node["outputs"].items():
+                        if "IMAGE" in output_type:
+                            image_output_nodes.append(key)
+                            break
+        
+        # If we found image output nodes, connect SaveTensorAPI to them
+        if image_output_nodes:
+            for i, node_key in enumerate(image_output_nodes):
+                new_key = f"999991_{i}"
+                prompt[new_key] = create_save_tensor_node({"images": [node_key, 0]})
+                print(f"Added SaveTensorAPI node connected to {node_key}")
+        else:
+            # Try to find the last node in the chain as fallback
+            last_node = None
+            max_id = -1
+            for key, node in prompt.items():
+                if isinstance(node, dict):
+                    try:
+                        node_id = int(key)
+                        if node_id > max_id:
+                            max_id = node_id
+                            last_node = key
+                    except ValueError:
+                        pass
+            
+            if last_node:
+                # Add a SaveTensorAPI node
+                new_key = "999991"
+                prompt[new_key] = create_save_tensor_node({"images": [last_node, 0]})
+                print(f"Added SaveTensorAPI node connected to {last_node}")
+            else:
+                print("Warning: Could not find a suitable node to connect SaveTensorAPI to")
+    
+    # Make sure all SaveTensorAPI nodes have proper configuration
+    for key, node in prompt.items():
+        if isinstance(node, dict) and node.get("class_type") == "SaveTensorAPI":
+            # Ensure format is set to PNG for optimal compatibility
+            if "inputs" in node:
+                node["inputs"]["format"] = "png"
+    
+    # Return the modified prompt
+    return prompt
+'''
\ No newline at end of file

From b3a95ad30941320ee4ca16d62d472b80eeaf8b5b Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 14:43:28 -0400
Subject: [PATCH 02/14] Cleanup of pre/post processing of frames.

---
 server/pipeline_api.py | 74 +++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 52 deletions(-)

diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index 0f04e773..bfaf4909 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -3,9 +3,6 @@
 import numpy as np
 import asyncio
 import logging
-import time
-from PIL import Image
-from io import BytesIO
 
 from typing import Any, Dict, Union, List
 from comfystream.client_api import ComfyStreamClient
@@ -76,62 +73,35 @@ async def put_audio_frame(self, frame: av.AudioFrame):
         self.client.put_audio_input(frame)
         await self.audio_incoming_frames.put(frame)
 
-    def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarray]:
-        """Convert input video frame to tensor in consistent BCHW format"""
-        try:
-            frame_np = frame.to_ndarray(format="rgb24")
-            frame_np = frame_np.astype(np.float32) / 255.0
-            tensor = torch.from_numpy(frame_np)
-
-            # TODO: Necessary?
-            if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
-                tensor = tensor.permute(2, 0, 1).unsqueeze(0)  # -> BCHW
-            
-            # Ensure values are in range [0,1]
-            if tensor.min() < 0 or tensor.max() > 1:
-                logger.warning(f"Clamping preprocessing tensor: min={tensor.min().item()}, max={tensor.max().item()}")
-                tensor = torch.clamp(tensor, 0, 1)
-
-            return tensor
-            
-        except Exception as e:
-            logger.error(f"Error in video_preprocess: {e}")
-            # Return a default tensor in case of error
-            return torch.zeros(1, 3, frame.height, frame.width)
-    
     def audio_preprocess(self, frame: av.AudioFrame) -> Union[torch.Tensor, np.ndarray]:
         return frame.to_ndarray().ravel().reshape(-1, 2).mean(axis=1).astype(np.int16)
     
+    # Works with ComfyUI Native
+    def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarray]:
+        frame_np = frame.to_ndarray(format="rgb24").astype(np.float32) / 255.0
+        return torch.from_numpy(frame_np).unsqueeze(0)
+    
+    ''' Converts HWC format (height, width, channels) to VideoFrame.
     def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
-        """Convert tensor to VideoFrame format"""
-        try:
-            # Ensure output is a tensor
-            if isinstance(output, np.ndarray):
-                output = torch.from_numpy(output)
-            
-            # Convert from BCHW to HWC format for video frame
-            if len(output.shape) == 4:  # BCHW format
-                output = output.squeeze(0)  # Remove batch dimension
-            if output.shape[0] == 3:  # CHW format
-                output = output.permute(1, 2, 0)  # Convert to HWC
-            
-            # Convert to numpy array in correct format for VideoFrame
-            frame_np = (output * 255.0).clamp(0, 255).to(dtype=torch.uint8).cpu().numpy()
-            
-            # Create VideoFrame with RGB format
-            video_frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
-            
-            logger.info(f"Created video frame with shape: {frame_np.shape}")
-            return video_frame
-        
-        except Exception as e:
-            logger.error(f"Error in video_postprocess: {str(e)}")
-            # Return a black frame as fallback
-            return av.VideoFrame(width=512, height=512, format='rgb24')
+        return av.VideoFrame.from_ndarray(
+            (output * 255.0).clamp(0, 255).to(dtype=torch.uint8).squeeze(0).cpu().numpy()
+        )
+    '''
+
+    '''Converts BCHW tensor [1,3,H,W] to VideoFrame. Assumes values are in range [0,1].'''
+    def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
+        return av.VideoFrame.from_ndarray(
+            (output.squeeze(0).permute(1, 2, 0) * 255.0)
+            .clamp(0, 255)
+            .to(dtype=torch.uint8)
+            .cpu()
+            .numpy(),
+            format='rgb24'
+        )
 
     def audio_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.AudioFrame:
         return av.AudioFrame.from_ndarray(np.repeat(output, 2).reshape(1, -1))
-    
+
     async def get_processed_video_frame(self):
         """Get processed video frame from output queue and match it with input frame"""
         try:

From 50ca4a1fc85ddf1db9e456114a5292fb3978e02a Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 15:24:36 -0400
Subject: [PATCH 03/14] Added built-in nodes for base64 string and websocket
 image send, reduced uncessary base64 input frame operations, prep for
 multi-instance, cleanup.

---
 nodes/native_utils/__init__.py             |  17 ++
 nodes/native_utils/load_image_base64.py    |  37 +++
 nodes/native_utils/send_image_websocket.py |  44 +++
 server/pipeline_api.py                     |  45 +--
 src/comfystream/client_api.py              | 313 ++++++---------------
 src/comfystream/utils_api.py               | 225 ++++-----------
 6 files changed, 270 insertions(+), 411 deletions(-)
 create mode 100644 nodes/native_utils/__init__.py
 create mode 100644 nodes/native_utils/load_image_base64.py
 create mode 100644 nodes/native_utils/send_image_websocket.py

diff --git a/nodes/native_utils/__init__.py b/nodes/native_utils/__init__.py
new file mode 100644
index 00000000..e7e7789c
--- /dev/null
+++ b/nodes/native_utils/__init__.py
@@ -0,0 +1,17 @@
+from .load_image_base64 import LoadImageBase64
+from .send_image_websocket import SendImageWebsocket
+
+# This dictionary is used by ComfyUI to register the nodes
+NODE_CLASS_MAPPINGS = {
+    "LoadImageBase64": LoadImageBase64, 
+    "SendImageWebsocket": SendImageWebsocket
+}
+
+# This dictionary provides display names for the nodes in the UI
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "LoadImageBase64": "Load Image Base64 (ComfyStream)",
+    "SendImageWebsocket": "Send Image Websocket (ComfyStream)"
+}
+
+# Export these variables for ComfyUI to use
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
diff --git a/nodes/native_utils/load_image_base64.py b/nodes/native_utils/load_image_base64.py
new file mode 100644
index 00000000..f46a90df
--- /dev/null
+++ b/nodes/native_utils/load_image_base64.py
@@ -0,0 +1,37 @@
+# borrowed from Acly's comfyui-tooling-nodes
+# https://github.com/Acly/comfyui-tooling-nodes/blob/main/nodes.py
+
+# TODO: I think we can recieve tensor data directly from the pipeline through the /prompt endpoint as JSON
+#       This may be more efficient than sending base64 encoded images through the websocket and
+#       allow for alternative data formats.
+
+from PIL import Image
+import base64
+import numpy as np
+import torch
+from io import BytesIO
+
+class LoadImageBase64:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"image": ("STRING", {"multiline": False})}}
+
+    RETURN_TYPES = ("IMAGE", "MASK")
+    CATEGORY = "external_tooling"
+    FUNCTION = "load_image"
+
+    def load_image(self, image):
+        imgdata = base64.b64decode(image)
+        img = Image.open(BytesIO(imgdata))
+
+        if "A" in img.getbands():
+            mask = np.array(img.getchannel("A")).astype(np.float32) / 255.0
+            mask = torch.from_numpy(mask)
+        else:
+            mask = None
+
+        img = img.convert("RGB")
+        img = np.array(img).astype(np.float32) / 255.0
+        img = torch.from_numpy(img)[None,]
+
+        return (img, mask)
\ No newline at end of file
diff --git a/nodes/native_utils/send_image_websocket.py b/nodes/native_utils/send_image_websocket.py
new file mode 100644
index 00000000..590d3b7e
--- /dev/null
+++ b/nodes/native_utils/send_image_websocket.py
@@ -0,0 +1,44 @@
+# borrowed from Acly's comfyui-tooling-nodes
+# https://github.com/Acly/comfyui-tooling-nodes/blob/main/nodes.py
+
+# TODO: I think we can send tensor data directly to the pipeline in the websocket response.
+#       Worth talking to ComfyAnonymous about this.
+
+import numpy as np
+from PIL import Image
+from server import PromptServer, BinaryEventTypes
+
+class SendImageWebsocket:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "images": ("IMAGE",),
+                "format": (["PNG", "JPEG"], {"default": "PNG"}),
+            }
+        }
+
+    RETURN_TYPES = ()
+    FUNCTION = "send_images"
+    OUTPUT_NODE = True
+    CATEGORY = "external_tooling"
+
+    def send_images(self, images, format):
+        results = []
+        for tensor in images:
+            array = 255.0 * tensor.cpu().numpy()
+            image = Image.fromarray(np.clip(array, 0, 255).astype(np.uint8))
+
+            server = PromptServer.instance
+            server.send_sync(
+                BinaryEventTypes.UNENCODED_PREVIEW_IMAGE,
+                [format, image, None],
+                server.client_id,
+            )
+            results.append({
+                "source": "websocket",
+                "content-type": f"image/{format.lower()}",
+                "type": "output",
+            })
+
+        return {"ui": {"images": results}}
\ No newline at end of file
diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index bfaf4909..a6eb1205 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -3,10 +3,10 @@
 import numpy as np
 import asyncio
 import logging
+import time
 
 from typing import Any, Dict, Union, List
 from comfystream.client_api import ComfyStreamClient
-from comfystream import tensor_cache
 
 WARMUP_RUNS = 5
 logger = logging.getLogger(__name__)
@@ -19,6 +19,10 @@ def __init__(self, **kwargs):
         self.audio_incoming_frames = asyncio.Queue()
 
         self.processed_audio_buffer = np.array([], dtype=np.int16)
+        self.last_frame_time = 0
+
+        # TODO: Not sure if this is needed - should match to UI selected FPS
+        self.min_frame_interval = 1/30  # Limit to 30 FPS
 
     async def warm_video(self):
         """Warm up the video pipeline with dummy frames"""
@@ -62,6 +66,11 @@ async def update_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any
             await self.client.update_prompts([prompts])
 
     async def put_video_frame(self, frame: av.VideoFrame):
+        current_time = time.time()
+        if current_time - self.last_frame_time < self.min_frame_interval:
+            return  # Skip frame if too soon
+            
+        self.last_frame_time = current_time
         frame.side_data.input = self.video_preprocess(frame)
         frame.side_data.skipped = False # Different from LoadTensor, we don't skip frames here
         self.client.put_video_input(frame)
@@ -78,8 +87,16 @@ def audio_preprocess(self, frame: av.AudioFrame) -> Union[torch.Tensor, np.ndarr
     
     # Works with ComfyUI Native
     def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarray]:
-        frame_np = frame.to_ndarray(format="rgb24").astype(np.float32) / 255.0
-        return torch.from_numpy(frame_np).unsqueeze(0)
+        # Convert directly to tensor, avoiding intermediate numpy array when possible
+        if hasattr(frame, 'to_tensor'):
+            tensor = frame.to_tensor()
+        else:
+            # If direct tensor conversion not available, use numpy
+            frame_np = frame.to_ndarray(format="rgb24")
+            tensor = torch.from_numpy(frame_np)
+        
+        # Normalize to [0,1] range and add batch dimension
+        return tensor.float().div(255.0).unsqueeze(0)
     
     ''' Converts HWC format (height, width, channels) to VideoFrame.
     def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
@@ -103,31 +120,21 @@ def audio_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.Audio
         return av.AudioFrame.from_ndarray(np.repeat(output, 2).reshape(1, -1))
 
     async def get_processed_video_frame(self):
-        """Get processed video frame from output queue and match it with input frame"""
         try:
             # Get the frame from the incoming queue first
             frame = await self.video_incoming_frames.get()
             
-            while frame.side_data.skipped:
+            # Skip frames if we're falling behind
+            while not self.video_incoming_frames.empty():
+                # Get newer frame and mark old one as skipped
+                frame.side_data.skipped = True
                 frame = await self.video_incoming_frames.get()
+                logger.info("Skipped older frame to catch up")
 
             # Get the processed frame from the output queue
-            logger.info("Getting video output")
             out_tensor = await self.client.get_video_output()
             
-            # If there are more frames in the output queue, drain them to get the most recent
-            # This helps with synchronization when processing is faster than display
-            while not tensor_cache.image_outputs.empty():
-                try:
-                    newer_tensor = await asyncio.wait_for(self.client.get_video_output(), 0.01)
-                    out_tensor = newer_tensor  # Use the most recent frame
-                    logger.info("Using more recent frame from output queue")
-                except asyncio.TimeoutError:
-                    break
-                
-            logger.info(f"Received output tensor with shape: {out_tensor.shape if hasattr(out_tensor, 'shape') else 'unknown'}")
-            
-            # Process the output tensor
+            # Process only the most recent frame
             processed_frame = self.video_postprocess(out_tensor)
             processed_frame.pts = frame.pts
             processed_frame.time_base = frame.time_base
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index 5e23e391..624de56e 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -255,11 +255,11 @@ async def _handle_text_message(self, message: str):
                         self._prompt_id = data["data"]["prompt_id"]
                     if "node" in data["data"]:
                         node_id = data["data"]["node"]
-                        # ogger.info(f"Executing node: {node_id}")
+                        # logger.info(f"Executing node: {node_id}")
             
             elif message_type in ["execution_cached", "execution_error", "execution_complete", "execution_interrupted"]:
                 # logger.info(f"{message_type} message received for prompt {self._prompt_id}")
-                #self.execution_started = False
+                # self.execution_started = False
                 
                 # Always signal completion for these terminal states
                 # self.execution_complete_event.set()
@@ -335,214 +335,90 @@ async def _handle_text_message(self, message: str):
     async def _handle_binary_message(self, binary_data):
         """Process binary messages from the WebSocket"""
         try:
-            # Log binary message information
-            # logger.info(f"Received binary message of size: {len(binary_data)} bytes")
-
-            # Signal execution is complete, queue next frame
-            self.execution_complete_event.set()
-            
-            # Binary messages in ComfyUI start with a header
-            # First 8 bytes are used for header information
+            # Early return if message is too short
             if len(binary_data) <= 8:
-                logger.warning(f"Binary message too short: {len(binary_data)} bytes")
+                self.execution_complete_event.set()
                 return
             
-            # Extract header data based on the actual format observed in logs
-            # Header bytes (hex): 0000000100000001 - this appears to be the format in use
+            # Extract header data only when needed
             event_type = int.from_bytes(binary_data[:4], byteorder='little')
             format_type = int.from_bytes(binary_data[4:8], byteorder='little')
             data = binary_data[8:]
             
-            # Log header details
-            logger.info(f"Binary message header: event_type={event_type}, format_type={format_type}, data_size={len(data)} bytes")
-            #logger.info(f"Header bytes (hex): {binary_data[:8].hex()}")
-            
-            # Check if this is an image (JPEG starts with 0xFF, 0xD8, PNG starts with 0x89, 0x50)
-            is_jpeg = data[:2] == b'\xff\xd8'
-            is_png = data[:4] == b'\x89\x50\x4e\x47'
+            # Quick check for image format
+            is_image = data[:2] in [b'\xff\xd8', b'\x89\x50']
+            if not is_image:
+                self.execution_complete_event.set()
+                return
             
-            if is_jpeg or is_png:
-                image_format = "JPEG" if is_jpeg else "PNG"
-                logger.info(f"Detected {image_format} image based on magic bytes")
-                
-                # Create a NEW binary message with the expected header format for the JavaScript client
-                # The JavaScript expects: [0:4]=1 (PREVIEW_IMAGE), [4:8]=1 (JPEG format) or [4:8]=2 (PNG format)
-                # This matches exactly what the JS code is looking for:
-                # const event = dataView.getUint32(0);  // event type (1 = PREVIEW_IMAGE)
-                # const format = dataView.getUint32(4); // format (1 = JPEG, 2 = PNG)
-                js_event_type = (1).to_bytes(4, byteorder='little')  # PREVIEW_IMAGE = 1
-                js_format_type = (1 if is_jpeg else 2).to_bytes(4, byteorder='little')
-                transformed_data = js_event_type + js_format_type + data
+            # Process image data directly
+            try:
+                img = Image.open(BytesIO(data))
+                if img.mode != "RGB":
+                    img = img.convert("RGB")
+                    
+                with torch.no_grad():
+                    tensor = torch.from_numpy(np.array(img)).float().permute(2, 0, 1).unsqueeze(0) / 255.0
                 
-                # Forward to WebSocket client if connected
-                # if self.ws:
-                #    await self.ws.send(transformed_data)
-                #    logger.info(f"Sent transformed {image_format} image data to WebSocket with correct JS header format")
-                #else:
-                #    logger.error("WebSocket not connected, cannot forward image to JS client")
+                # Add to output queue without waiting
+                tensor_cache.image_outputs.put_nowait(tensor)
+                self.execution_complete_event.set()
                 
-                # Process the image for our pipeline
-                try:
-                    # Decode the image
-                    img = Image.open(BytesIO(data))
-                    logger.info(f"Successfully decoded image: size={img.size}, mode={img.mode}, format={img.format}")
-                    
-                    # Convert to RGB if not already
-                    if img.mode != "RGB":
-                        img = img.convert("RGB")
-                        logger.info(f"Converted image to RGB mode")
-
-                    # Save image to temp folder as a file
-                    # TESTING
-                    '''
-                    import os
-                    import tempfile
-                    temp_folder = os.path.join(tempfile.gettempdir(), "comfyui_images")
-                    os.makedirs(temp_folder, exist_ok=True)
-                    img_path = os.path.join(temp_folder, f"comfyui_image_{time.time()}.png")
-                    img.save(img_path)
-                    logger.info(f"Saved image to {img_path}")
-                    '''
-
-                    # Convert to tensor (normalize to [0,1] range for consistency)
-                    img_np = np.array(img).astype(np.float32) / 255.0
-                    tensor = torch.from_numpy(img_np)
-                    
-                    # CRITICAL: Ensure dimensions are correctly understood
-                    # The tensor should be in HWC format initially from PIL/numpy
-                    logger.info(f"Initial tensor shape from image: {tensor.shape}")
-                    
-                    # Convert from HWC to BCHW format for consistency with model expectations
-                    if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format (H,W,3)
-                        tensor = tensor.permute(2, 0, 1).unsqueeze(0)  # -> BCHW (1,3,H,W)
-                        logger.info(f"Converted to BCHW tensor with shape: {tensor.shape}")
-
-                    # Check for NaN or Inf values
-                    if torch.isnan(tensor).any() or torch.isinf(tensor).any():
-                        logger.warning("Tensor contains NaN or Inf values! Replacing with zeros")
-                        tensor = torch.nan_to_num(tensor, nan=0.0, posinf=1.0, neginf=0.0)
-                    
-                    # Log detailed tensor info for debugging
-                    logger.info(f"Final tensor with shape: {tensor.shape}, "
-                               f"min={tensor.min().item()}, max={tensor.max().item()}, "
-                               f"mean={tensor.mean().item()}")
-                    
-                    # Add to output queue without waiting
-                    tensor_cache.image_outputs.put_nowait(tensor)
-                    logger.info(f"Added tensor to output queue, queue size: {tensor_cache.image_outputs.qsize()}")
-                    return
+            except Exception as img_error:
+                logger.error(f"Error processing image: {img_error}")
+                self.execution_complete_event.set()
                 
-                except Exception as img_error:
-                    logger.error(f"Error processing image: {img_error}", exc_info=True)
-                    
-            # If we get here, we couldn't process the image
-            logger.warning("Failed to process image, creating default tensor")
-            default_tensor = torch.zeros(1, 3, 512, 512)
-            tensor_cache.image_outputs.put_nowait(default_tensor)
-            self.execution_complete_event.set()
-            
         except Exception as e:
-            logger.error(f"Error handling binary message: {e}", exc_info=True)
-            # Set execution complete event to avoid hanging
+            logger.error(f"Error handling binary message: {e}")
             self.execution_complete_event.set()
     
     async def _execute_prompt(self, prompt_index: int):
-        """Execute a prompt via the ComfyUI API"""
         try:
             # Get the prompt to execute
             prompt = self.current_prompts[prompt_index]
             
-            # Ensure all seed values are randomized for every execution
-            # This forces ComfyUI to not use cached results
-            for node_id, node in prompt.items():
-                if isinstance(node, dict) and "inputs" in node:
-                    if "seed" in node["inputs"]:
-                        # Generate a truly random seed each time
-                        random_seed = random.randint(0, 18446744073709551615)
-                        node["inputs"]["seed"] = random_seed
-                        logger.info(f"Randomized seed to {random_seed} for node {node_id}")
-                        
-                    # Also randomize noise_seed if present
-                    if "noise_seed" in node["inputs"]:
-                        noise_seed = random.randint(0, 18446744073709551615)
-                        node["inputs"]["noise_seed"] = noise_seed
-                        logger.info(f"Randomized noise_seed to {noise_seed} for node {node_id}")
-            
-            # Add a timestamp parameter to each node to prevent caching
-            # This is a "hidden" trick to force ComfyUI to consider each execution unique
-            timestamp = int(time.time() * 1000)  # millisecond timestamp
-            for node_id, node in prompt.items():
-                if isinstance(node, dict) and "inputs" in node:
-                    # Add a timestamp parameter to ETN_LoadImageBase64 nodes
-                    if node.get("class_type") in ["ETN_LoadImageBase64", "LoadImageBase64"]:
-                        # Add a unique cache-busting parameter
-                        node["inputs"]["_timestamp"] = timestamp
-                        logger.info(f"Added timestamp {timestamp} to node {node_id}")
-            
             # Check if we have a frame waiting to be processed
             if not tensor_cache.image_inputs.empty():
                 logger.info("Found tensor in input queue, preparing for API")
-                # Get the frame from the cache - make sure to get the most recent frame
+                # Get the most recent frame only
+                frame_or_tensor = None
                 while not tensor_cache.image_inputs.empty():
                     frame_or_tensor = tensor_cache.image_inputs.get_nowait()
                 
-                # Find ETN_LoadImageBase64 nodes
+                # Find ETN_LoadImageBase64 nodes first
                 load_image_nodes = []
                 for node_id, node in prompt.items():
-                    if isinstance(node, dict) and node.get("class_type") == "ETN_LoadImageBase64":
+                    if isinstance(node, dict) and node.get("class_type") in ["ETN_LoadImageBase64", "LoadImageBase64"]:
                         load_image_nodes.append(node_id)
                 
                 if not load_image_nodes:
-                    # Also check for regular LoadImageBase64 nodes as fallback
-                    for node_id, node in prompt.items():
-                        if isinstance(node, dict) and node.get("class_type") == "LoadImageBase64":
-                            load_image_nodes.append(node_id)
-                        
-                if not load_image_nodes:
-                    logger.warning("No ETN_LoadImageBase64 or LoadImageBase64 nodes found in the prompt")
-                    self.execution_complete_event.set()  # Signal completion
+                    logger.warning("No LoadImageBase64 nodes found in the prompt")
+                    self.execution_complete_event.set()
                     return
-                else:
-                    # Convert the frame/tensor to base64 and include directly in the prompt
-                    try:
-                        # Get the actual tensor data - handle different input types
-                        tensor = None
-                        
-                        # Check if it's a PyAV VideoFrame with preprocessed tensor in side_data
-                        if hasattr(frame_or_tensor, 'side_data') and hasattr(frame_or_tensor.side_data, 'input'):
-                            tensor = frame_or_tensor.side_data.input
-                            logger.info(f"Using preprocessed tensor from frame.side_data.input with shape {tensor.shape}")
-                        # Check if it's a PyTorch tensor
-                        elif isinstance(frame_or_tensor, torch.Tensor):
-                            tensor = frame_or_tensor
-                            logger.info(f"Using tensor directly with shape {tensor.shape}")
-                        # Check if it's a numpy array
-                        elif isinstance(frame_or_tensor, np.ndarray):
-                            tensor = torch.from_numpy(frame_or_tensor).float()
-                            logger.info(f"Converted numpy array to tensor with shape {tensor.shape}")
-                        else:
-                            # If it's a PyAV frame without preprocessed data, convert it
-                            try:
-                                if hasattr(frame_or_tensor, 'to_ndarray'):
-                                    frame_np = frame_or_tensor.to_ndarray(format="rgb24").astype(np.float32) / 255.0
-                                    tensor = torch.from_numpy(frame_np).unsqueeze(0)
-                                    logger.info(f"Converted PyAV frame to tensor with shape {tensor.shape}")
-                                else:
-                                    logger.error(f"Unsupported frame type: {type(frame_or_tensor)}")
-                                    self.execution_complete_event.set()
-                                    return
-                            except Exception as e:
-                                logger.error(f"Error converting frame to tensor: {e}")
-                                self.execution_complete_event.set()
-                                return
-                        
-                        if tensor is None:
-                            logger.error("Failed to get valid tensor data from input")
-                            self.execution_complete_event.set()
-                            return
-                        
-                        # Now process the tensor (which should be a proper PyTorch tensor)
-                        # Ensure it's a tensor on CPU and detached
+                
+                # Process the tensor ONLY if we have nodes to send it to
+                try:
+                    # Get the actual tensor data - handle different input types
+                    tensor = None
+                    
+                    # Handle different input types efficiently
+                    if hasattr(frame_or_tensor, 'side_data') and hasattr(frame_or_tensor.side_data, 'input'):
+                        tensor = frame_or_tensor.side_data.input
+                    elif isinstance(frame_or_tensor, torch.Tensor):
+                        tensor = frame_or_tensor
+                    elif isinstance(frame_or_tensor, np.ndarray):
+                        tensor = torch.from_numpy(frame_or_tensor).float()
+                    elif hasattr(frame_or_tensor, 'to_ndarray'):
+                        frame_np = frame_or_tensor.to_ndarray(format="rgb24").astype(np.float32) / 255.0
+                        tensor = torch.from_numpy(frame_np).unsqueeze(0)
+                    
+                    if tensor is None:
+                        logger.error("Failed to get valid tensor data from input")
+                        self.execution_complete_event.set()
+                        return
+                    
+                    # Process tensor format only once
+                    with torch.no_grad():
                         tensor = tensor.detach().cpu().float()
                         
                         # Handle different formats
@@ -553,65 +429,52 @@ async def _execute_prompt(self, prompt_index: int):
                         if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
                             tensor = tensor.permute(2, 0, 1)  # Convert to CHW
                         
-                        # Convert to PIL image for saving
+                        # Convert to PIL image for base64 ONLY ONCE
                         tensor_np = (tensor.permute(1, 2, 0) * 255).clamp(0, 255).numpy().astype(np.uint8)
                         img = Image.fromarray(tensor_np)
                         
-                        # Save as PNG to BytesIO and convert to base64 string
+                        # Convert to base64 ONCE for all nodes
                         buffer = BytesIO()
                         img.save(buffer, format="PNG")
                         buffer.seek(0)
-                        
-                        # Encode as base64 - for ETN_LoadImageBase64, we need the raw base64 string
                         img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-                        logger.info(f"Created base64 string of length: {len(img_base64)}")
-                        
-                        # Update all ETN_LoadImageBase64 nodes with the base64 data
-                        for node_id in load_image_nodes:
-                            prompt[node_id]["inputs"]["image"] = img_base64
-                            # Add a small random suffix to image data to prevent caching
-                            rand_suffix = str(random.randint(1, 1000000))
-                            prompt[node_id]["inputs"]["_cache_buster"] = rand_suffix
-                            logger.info(f"Updated node {node_id} with base64 string and cache buster {rand_suffix}")
                     
-                    except Exception as e:
-                        logger.error(f"Error converting tensor to base64: {e}")
-                        # Signal execution complete in case of error
-                        self.execution_complete_event.set()
-                        return
+                    # Update all nodes with the SAME base64 string
+                    timestamp = int(time.time() * 1000)
+                    for node_id in load_image_nodes:
+                        prompt[node_id]["inputs"]["image"] = img_base64
+                        prompt[node_id]["inputs"]["_timestamp"] = timestamp
+                        # Use timestamp as cache buster instead of random number
+                        prompt[node_id]["inputs"]["_cache_buster"] = str(timestamp)
+                
+                except Exception as e:
+                    logger.error(f"Error converting tensor to base64: {e}")
+                    self.execution_complete_event.set()
+                    return
+                
+                # Execute the prompt via API
+                async with aiohttp.ClientSession() as session:
+                    api_url = f"{self.api_base_url}/prompt"
+                    payload = {
+                        "prompt": prompt,
+                        "client_id": self.client_id
+                    }
+                    
+                    async with session.post(api_url, json=payload) as response:
+                        if response.status == 200:
+                            result = await response.json()
+                            self._prompt_id = result.get("prompt_id")
+                            self.execution_started = True
+                        else:
+                            error_text = await response.text()
+                            logger.error(f"Error queueing prompt: {response.status} - {error_text}")
+                            self.execution_complete_event.set()
             else:
                 logger.info("No tensor in input queue, skipping prompt execution")
-                self.execution_complete_event.set()  # Signal completion
-                return
-            
-            # Execute the prompt via API
-            async with aiohttp.ClientSession() as session:
-                api_url = f"{self.api_base_url}/prompt"
-                payload = {
-                    "prompt": prompt,
-                    "client_id": self.client_id
-                }
-                
-                # Send the request
-                logger.info(f"Sending prompt to {api_url}")
-                async with session.post(api_url, json=payload) as response:
-                    if response.status == 200:
-                        result = await response.json()
-                        self._prompt_id = result.get("prompt_id")
-                        logger.info(f"Prompt queued with ID: {self._prompt_id}")
-                        self.execution_started = True
-                    else:
-                        error_text = await response.text()
-                        logger.error(f"Error queueing prompt: {response.status} - {error_text}")
-                        # Signal execution complete in case of error
-                        self.execution_complete_event.set()
+                self.execution_complete_event.set()
                 
-        except aiohttp.ClientError as e:
-            logger.error(f"Client error queueing prompt: {e}")
-            self.execution_complete_event.set()
         except Exception as e:
             logger.error(f"Error executing prompt: {e}")
-            # Signal execution complete in case of error
             self.execution_complete_event.set()
     
     async def _send_tensor_via_websocket(self, tensor):
diff --git a/src/comfystream/utils_api.py b/src/comfystream/utils_api.py
index 8b93ec7f..5a932ef1 100644
--- a/src/comfystream/utils_api.py
+++ b/src/comfystream/utils_api.py
@@ -15,7 +15,6 @@ def create_load_tensor_node():
         "_meta": {"title": "Load Tensor (API)"},
     }
 
-
 def create_save_tensor_node(inputs: Dict[Any, Any]):
     """Create a SaveTensorAPI node with proper input formatting"""
     # Make sure images input is properly formatted [node_id, output_index]
@@ -51,79 +50,29 @@ def convert_prompt(prompt):
             random_seed = random.randint(0, 18446744073709551615)
             node["inputs"]["seed"] = random_seed
             print(f"Set random seed {random_seed} for node {key}")
+
+
+    # Replace LoadImage with LoadImageBase64
+    for key, node in prompt.items():
+        if node.get("class_type") == "LoadImage":
+            node["class_type"] = "LoadImageBase64"
+
+    # Replace SaveImage/PreviewImage with SendImageWebsocket
+    for key, node in prompt.items():
+        if node.get("class_type") in ["SaveImage", "PreviewImage"]:
+            node["class_type"] = "SendImageWebsocket"
+            # Ensure format is set
+            if "format" not in node["inputs"]:
+                node["inputs"]["format"] = "PNG"  # Set default format
             
     return prompt
 
 '''
-def convert_prompt(prompt):
+def convert_prompt(prompt: PromptDictInput) -> Prompt:
+    # Validate the schema
+    Prompt.validate(prompt)
 
-    # Check if this is a ComfyUI web UI format prompt with 'nodes' and 'links'
-    if isinstance(prompt, dict) and 'nodes' in prompt and 'links' in prompt:
-        # Convert the web UI prompt format to the API format
-        api_prompt = {}
-        
-        # Process each node
-        for node in prompt['nodes']:
-            node_id = str(node['id'])
-            
-            # Create a node entry in the API format
-            api_prompt[node_id] = {
-                'class_type': node.get('type', 'Unknown'),
-                'inputs': {},
-                '_meta': {
-                    'title': node.get('type', 'Unknown')
-                }
-            }
-            
-            # Process inputs
-            if 'inputs' in node:
-                for input_data in node['inputs']:
-                    input_name = input_data.get('name')
-                    link_id = input_data.get('link')
-                    
-                    if input_name and link_id is not None:
-                        # Find the source of this link
-                        for link in prompt['links']:
-                            if link[0] == link_id:  # link ID matches
-                                # Get source node and output slot
-                                source_node_id = str(link[1])
-                                source_slot = link[3]
-                                
-                                # Add to inputs
-                                api_prompt[node_id]['inputs'][input_name] = [
-                                    source_node_id,
-                                    source_slot
-                                ]
-                                break
-                        # If no link found, set to empty value
-                        if input_name not in api_prompt[node_id]['inputs']:
-                            api_prompt[node_id]['inputs'][input_name] = None
-            
-            # Process widget values
-            if 'widgets_values' in node:
-                for i, widget_value in enumerate(node.get('widgets_values', [])):
-                    # Try to determine widget name from properties or use index
-                    widget_name = f"widget_{i}"
-                    # Add to inputs
-                    api_prompt[node_id]['inputs'][widget_name] = widget_value
-        
-        # Use this converted prompt instead
-        prompt = api_prompt
-    
-    # Now process as normal API format prompt
     prompt = copy.deepcopy(prompt)
-    
-    # Set random seeds for any seed nodes
-    for key, node in prompt.items():
-        if not isinstance(node, dict) or "inputs" not in node:
-            continue
-            
-        # Check if this node has a seed input directly
-        if "seed" in node.get("inputs", {}):
-            # Generate a random seed (same range as JavaScript's Math.random() * 18446744073709552000)
-            random_seed = random.randint(0, 18446744073709551615)
-            node["inputs"]["seed"] = random_seed
-            print(f"Set random seed {random_seed} for node {key}")
 
     num_primary_inputs = 0
     num_inputs = 0
@@ -134,112 +83,54 @@ def convert_prompt(prompt):
         "LoadImage": [],
         "PreviewImage": [],
         "SaveImage": [],
-        "LoadTensor": [],
-        "SaveTensor": [],
-        "LoadImageBase64": [],
-        "LoadTensorAPI": [],
-        "SaveTensorAPI": [],
     }
     
     for key, node in prompt.items():
-        if not isinstance(node, dict):
-            continue
+        class_type = node.get("class_type")
+
+        # Collect keys for nodes that might need to be replaced
+        if class_type in keys:
+            keys[class_type].append(key)
 
-        class_type = node.get("class_type", "")
-        
-        # Track primary input and output nodes
-        if class_type in ["PrimaryInput", "PrimaryInputImage"]:
+        # Count inputs and outputs
+        if class_type == "PrimaryInputLoadImage":
             num_primary_inputs += 1
-            keys["PrimaryInputLoadImage"].append(key)
-        elif class_type in ["LoadImage", "LoadTensor", "LoadAudioTensor", "LoadImageBase64", "LoadTensorAPI"]:
+        elif class_type in ["LoadImage", "LoadTensor", "LoadAudioTensor"]:
             num_inputs += 1
-            if class_type == "LoadImage":
-                keys["LoadImage"].append(key)
-            elif class_type == "LoadTensor": 
-                keys["LoadTensor"].append(key)
-            elif class_type == "LoadImageBase64":
-                keys["LoadImageBase64"].append(key)
-            elif class_type == "LoadTensorAPI":
-                keys["LoadTensorAPI"].append(key)
-        elif class_type in ["PreviewImage", "SaveImage", "SaveTensor", "SaveAudioTensor", "SendImageWebSocket", "SaveTensorAPI"]:
+        elif class_type in ["PreviewImage", "SaveImage", "SaveTensor", "SaveAudioTensor"]:
             num_outputs += 1
-            if class_type == "PreviewImage":
-                keys["PreviewImage"].append(key)
-            elif class_type == "SaveImage":
-                keys["SaveImage"].append(key)
-            elif class_type == "SaveTensor":
-                keys["SaveTensor"].append(key)
-            elif class_type == "SaveTensorAPI":
-                keys["SaveTensorAPI"].append(key)
-
-    print(f"Found {num_primary_inputs} primary inputs, {num_inputs} inputs, {num_outputs} outputs")
-
-    # Set up connection for video feeds by replacing LoadImage with LoadImageBase64
-    if num_inputs == 0 and num_primary_inputs == 0:
-        # Add a LoadTensorAPI node
-        new_key = "999990"
-        prompt[new_key] = create_load_tensor_node()
-        keys["LoadTensorAPI"].append(new_key)
-        print("Added LoadTensorAPI node for tensor input")
-    elif len(keys["LoadTensor"]) > 0 and len(keys["LoadTensorAPI"]) == 0:
-        # Replace LoadTensor with LoadTensorAPI if found
-        for key in keys["LoadTensor"]:
-            prompt[key] = create_load_tensor_node()
-            keys["LoadTensorAPI"].append(key)
-        print("Replaced LoadTensor with LoadTensorAPI")
-
-    # Set up connection for output if needed
+
+    # Only handle single primary input
+    if num_primary_inputs > 1:
+        raise Exception("too many primary inputs in prompt")
+
+    # If there are no primary inputs, only handle single input
+    if num_primary_inputs == 0 and num_inputs > 1:
+        raise Exception("too many inputs in prompt")
+
+    # Only handle single output for now
+    if num_outputs > 1:
+        raise Exception("too many outputs in prompt")
+
+    if num_primary_inputs + num_inputs == 0:
+        raise Exception("missing input")
+
     if num_outputs == 0:
-        # Find nodes that produce images
-        image_output_nodes = []
-        for key, node in prompt.items():
-            if isinstance(node, dict):
-                class_type = node.get("class_type", "")
-                # Look for nodes that typically output images
-                if any(output_type in class_type.lower() for output_type in ["vae", "decode", "img", "image", "upscale", "sample"]):
-                    image_output_nodes.append(key)
-                # Also check if the node's RETURN_TYPES includes IMAGE
-                elif "outputs" in node and isinstance(node["outputs"], dict):
-                    for output_name, output_type in node["outputs"].items():
-                        if "IMAGE" in output_type:
-                            image_output_nodes.append(key)
-                            break
-        
-        # If we found image output nodes, connect SaveTensorAPI to them
-        if image_output_nodes:
-            for i, node_key in enumerate(image_output_nodes):
-                new_key = f"999991_{i}"
-                prompt[new_key] = create_save_tensor_node({"images": [node_key, 0]})
-                print(f"Added SaveTensorAPI node connected to {node_key}")
-        else:
-            # Try to find the last node in the chain as fallback
-            last_node = None
-            max_id = -1
-            for key, node in prompt.items():
-                if isinstance(node, dict):
-                    try:
-                        node_id = int(key)
-                        if node_id > max_id:
-                            max_id = node_id
-                            last_node = key
-                    except ValueError:
-                        pass
-            
-            if last_node:
-                # Add a SaveTensorAPI node
-                new_key = "999991"
-                prompt[new_key] = create_save_tensor_node({"images": [last_node, 0]})
-                print(f"Added SaveTensorAPI node connected to {last_node}")
-            else:
-                print("Warning: Could not find a suitable node to connect SaveTensorAPI to")
-    
-    # Make sure all SaveTensorAPI nodes have proper configuration
-    for key, node in prompt.items():
-        if isinstance(node, dict) and node.get("class_type") == "SaveTensorAPI":
-            # Ensure format is set to PNG for optimal compatibility
-            if "inputs" in node:
-                node["inputs"]["format"] = "png"
-    
-    # Return the modified prompt
+        raise Exception("missing output")
+
+    # Replace nodes
+    for key in keys["PrimaryInputLoadImage"]:
+        prompt[key] = create_load_tensor_node()
+
+    if num_primary_inputs == 0 and len(keys["LoadImage"]) == 1:
+        prompt[keys["LoadImage"][0]] = create_load_tensor_node()
+
+    for key in keys["PreviewImage"] + keys["SaveImage"]:
+        node = prompt[key]
+        prompt[key] = create_save_tensor_node(node["inputs"])
+
+    # Validate the processed prompt input
+    prompt = Prompt.validate(prompt)
+
     return prompt
 '''
\ No newline at end of file

From d66dac4dbbdc94d7ff7f394ee9b6b0aae78aeca0 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 15:48:50 -0400
Subject: [PATCH 04/14] Preliminary work on multi-Comfy server inference.

---
 server/app_api.py      |  14 ++-
 server/pipeline_api.py | 246 ++++++++++++++++++++++++++++++++---------
 2 files changed, 207 insertions(+), 53 deletions(-)

diff --git a/server/app_api.py b/server/app_api.py
index 6158a540..94fc36af 100644
--- a/server/app_api.py
+++ b/server/app_api.py
@@ -26,7 +26,6 @@
 from twilio.rest import Client
 from utils import patch_loop_datagram, add_prefix_to_app_routes, FPSMeter
 from metrics import MetricsManager, StreamStatsManager
-import time
 
 logger = logging.getLogger(__name__)
 logging.getLogger("aiortc.rtcrtpsender").setLevel(logging.WARNING)
@@ -345,7 +344,11 @@ async def on_startup(app: web.Application):
         patch_loop_datagram(app["media_ports"])
 
     app["pipeline"] = Pipeline(
-        cwd=app["workspace"], disable_cuda_malloc=True, gpu_only=True, preview_method='none'
+        config_path=app["config_file"],
+        cwd=app["workspace"], 
+        disable_cuda_malloc=True, 
+        gpu_only=True, 
+        preview_method='none'
     )
     app["pcs"] = set()
     app["video_tracks"] = {}
@@ -374,6 +377,12 @@ async def on_shutdown(app: web.Application):
         choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
         help="Set the logging level",
     )
+    parser.add_argument(
+        "--config-file",
+        type=str,
+        default=None,
+        help="Path to TOML configuration file for Comfy servers"
+    )
     parser.add_argument(
         "--monitor",
         default=False,
@@ -397,6 +406,7 @@ async def on_shutdown(app: web.Application):
     app = web.Application()
     app["media_ports"] = args.media_ports.split(",") if args.media_ports else None
     app["workspace"] = args.workspace
+    app["config_file"] = args.config_file
 
     app.on_startup.append(on_startup)
     app.on_shutdown.append(on_shutdown)
diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index a6eb1205..6849c265 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -4,88 +4,214 @@
 import asyncio
 import logging
 import time
+import random
+from collections import deque
 
-from typing import Any, Dict, Union, List
+from typing import Any, Dict, Union, List, Optional, Deque
 from comfystream.client_api import ComfyStreamClient
+from config import ComfyConfig
 
 WARMUP_RUNS = 5
 logger = logging.getLogger(__name__)
 
 
-class Pipeline:
-    def __init__(self, **kwargs):
-        self.client = ComfyStreamClient(**kwargs)
+class MultiServerPipeline:
+    def __init__(self, config_path: Optional[str] = None, **kwargs):
+        # Load server configurations
+        self.config = ComfyConfig(config_path)
+        self.servers = self.config.get_servers()
+        
+        # Create client for each server
+        self.clients = []
+        for server_config in self.servers:
+            client_kwargs = kwargs.copy()
+            client_kwargs.update(server_config)
+            self.clients.append(ComfyStreamClient(**client_kwargs))
+        
+        logger.info(f"Initialized {len(self.clients)} ComfyUI clients")
+        
         self.video_incoming_frames = asyncio.Queue()
         self.audio_incoming_frames = asyncio.Queue()
-
+        
+        # Queue for processed frames from all clients
+        self.processed_video_frames = asyncio.Queue()
+        
+        # Track which client gets each frame (round-robin)
+        self.current_client_index = 0
+        self.client_frame_mapping = {}  # Maps frame_id -> client_index
+        
+        # Buffer to store frames in order of original pts
+        self.frame_output_buffer: Deque = deque()
+        
+        # Audio processing
         self.processed_audio_buffer = np.array([], dtype=np.int16)
         self.last_frame_time = 0
 
-        # TODO: Not sure if this is needed - should match to UI selected FPS
+        # Frame rate limiting
         self.min_frame_interval = 1/30  # Limit to 30 FPS
+        
+        # Create background task for collecting processed frames
+        self.running = True
+        self.collector_task = asyncio.create_task(self._collect_processed_frames())
+
+    async def _collect_processed_frames(self):
+        """Background task to collect processed frames from all clients"""
+        try:
+            while self.running:
+                for i, client in enumerate(self.clients):
+                    try:
+                        # Non-blocking check if client has output ready
+                        if hasattr(client, '_prompt_id') and client._prompt_id is not None:
+                            # Get frame without waiting
+                            try:
+                                # Use wait_for with small timeout to avoid blocking
+                                out_tensor = await asyncio.wait_for(
+                                    client.get_video_output(), 
+                                    timeout=0.01
+                                )
+                                
+                                # Find which original frame this corresponds to
+                                # (using a simple approach here - could be improved)
+                                # In real implementation, need to track which frames went to which client
+                                frame_ids = [frame_id for frame_id, client_idx in 
+                                          self.client_frame_mapping.items() if client_idx == i]
+                                
+                                if frame_ids:
+                                    # Use the oldest frame ID for this client
+                                    frame_id = min(frame_ids)
+                                    # Store the processed tensor along with original frame ID for ordering
+                                    await self.processed_video_frames.put((frame_id, out_tensor))
+                                    # Remove the mapping
+                                    self.client_frame_mapping.pop(frame_id, None)
+                                    logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
+                            except asyncio.TimeoutError:
+                                # No frame ready yet, continue
+                                pass
+                    except Exception as e:
+                        logger.error(f"Error collecting frame from client {i}: {e}")
+                
+                # Small sleep to avoid CPU spinning
+                await asyncio.sleep(0.01)
+        except asyncio.CancelledError:
+            logger.info("Frame collector task cancelled")
+        except Exception as e:
+            logger.error(f"Unexpected error in frame collector: {e}")
 
     async def warm_video(self):
-        """Warm up the video pipeline with dummy frames"""
+        """Warm up the video pipeline with dummy frames for each client"""
         logger.info("Warming up video pipeline...")
         
-        # Create a properly formatted dummy frame (random color pattern)
-        # Using standard tensor shape: BCHW [1, 3, 512, 512]
+        # Create a properly formatted dummy frame
         tensor = torch.rand(1, 3, 512, 512)  # Random values in [0,1]
-        
-        # Create a dummy frame and attach the tensor as side_data
         dummy_frame = av.VideoFrame(width=512, height=512, format="rgb24")
         dummy_frame.side_data.input = tensor
         
-        # Process a few frames for warmup
-        for i in range(WARMUP_RUNS):
-            logger.info(f"Video warmup iteration {i+1}/{WARMUP_RUNS}")
-            self.client.put_video_input(dummy_frame)
-            await self.client.get_video_output()
-        
+        # Warm up each client
+        warmup_tasks = []
+        for i, client in enumerate(self.clients):
+            warmup_tasks.append(self._warm_client_video(client, i, dummy_frame))
+            
+        # Wait for all warmup tasks to complete
+        await asyncio.gather(*warmup_tasks)
         logger.info("Video pipeline warmup complete")
+    
+    async def _warm_client_video(self, client, client_index, dummy_frame):
+        """Warm up a single client"""
+        logger.info(f"Warming up client {client_index}")
+        for i in range(WARMUP_RUNS):
+            logger.info(f"Client {client_index} warmup iteration {i+1}/{WARMUP_RUNS}")
+            client.put_video_input(dummy_frame)
+            try:
+                await asyncio.wait_for(client.get_video_output(), timeout=5.0)
+            except asyncio.TimeoutError:
+                logger.warning(f"Timeout waiting for warmup frame from client {client_index}")
+            except Exception as e:
+                logger.error(f"Error warming client {client_index}: {e}")
 
     async def warm_audio(self):
+        # For now, only use the first client for audio
+        if not self.clients:
+            logger.warning("No clients available for audio warmup")
+            return
+            
         dummy_frame = av.AudioFrame()
-        dummy_frame.side_data.input = np.random.randint(-32768, 32767, int(48000 * 0.5), dtype=np.int16)   # TODO: adds a lot of delay if it doesn't match the buffer size, is warmup needed?
+        dummy_frame.side_data.input = np.random.randint(-32768, 32767, int(48000 * 0.5), dtype=np.int16)
         dummy_frame.sample_rate = 48000
 
         for _ in range(WARMUP_RUNS):
-            self.client.put_audio_input(dummy_frame)
-            await self.client.get_audio_output()
+            self.clients[0].put_audio_input(dummy_frame)
+            await self.clients[0].get_audio_output()
 
     async def set_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any]]]):
-        if isinstance(prompts, list):
-            await self.client.set_prompts(prompts)
-        else:
-            await self.client.set_prompts([prompts])
+        """Set the same prompts for all clients"""
+        if isinstance(prompts, dict):
+            prompts = [prompts]
+            
+        # Set prompts for each client
+        tasks = []
+        for client in self.clients:
+            tasks.append(client.set_prompts(prompts))
+            
+        await asyncio.gather(*tasks)
+        logger.info(f"Set prompts for {len(self.clients)} clients")
 
     async def update_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any]]]):
-        if isinstance(prompts, list):
-            await self.client.update_prompts(prompts)
-        else:
-            await self.client.update_prompts([prompts])
+        """Update prompts for all clients"""
+        if isinstance(prompts, dict):
+            prompts = [prompts]
+            
+        # Update prompts for each client
+        tasks = []
+        for client in self.clients:
+            tasks.append(client.update_prompts(prompts))
+            
+        await asyncio.gather(*tasks)
+        logger.info(f"Updated prompts for {len(self.clients)} clients")
 
     async def put_video_frame(self, frame: av.VideoFrame):
+        """Distribute video frames among clients using round-robin"""
         current_time = time.time()
         if current_time - self.last_frame_time < self.min_frame_interval:
             return  # Skip frame if too soon
             
         self.last_frame_time = current_time
+        
+        # Generate a unique frame ID
+        frame_id = int(time.time() * 1000000)  # Microseconds as ID
+        frame.side_data.frame_id = frame_id
+        
+        # Preprocess the frame
         frame.side_data.input = self.video_preprocess(frame)
-        frame.side_data.skipped = False # Different from LoadTensor, we don't skip frames here
-        self.client.put_video_input(frame)
+        frame.side_data.skipped = False
+        
+        # Select the next client in round-robin fashion
+        client_index = self.current_client_index
+        self.current_client_index = (self.current_client_index + 1) % len(self.clients)
+        
+        # Store mapping of which client is processing this frame
+        self.client_frame_mapping[frame_id] = client_index
+        
+        # Send frame to the selected client
+        self.clients[client_index].put_video_input(frame)
+        
+        # Also add to the incoming queue for reference
         await self.video_incoming_frames.put(frame)
+        
+        logger.info(f"Sent frame {frame_id} to client {client_index}")
 
     async def put_audio_frame(self, frame: av.AudioFrame):
+        # For now, only use the first client for audio
+        if not self.clients:
+            return
+            
         frame.side_data.input = self.audio_preprocess(frame)
         frame.side_data.skipped = False
-        self.client.put_audio_input(frame)
+        self.clients[0].put_audio_input(frame)
         await self.audio_incoming_frames.put(frame)
 
     def audio_preprocess(self, frame: av.AudioFrame) -> Union[torch.Tensor, np.ndarray]:
         return frame.to_ndarray().ravel().reshape(-1, 2).mean(axis=1).astype(np.int16)
     
-    # Works with ComfyUI Native
     def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarray]:
         # Convert directly to tensor, avoiding intermediate numpy array when possible
         if hasattr(frame, 'to_tensor'):
@@ -97,15 +223,7 @@ def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarr
         
         # Normalize to [0,1] range and add batch dimension
         return tensor.float().div(255.0).unsqueeze(0)
-    
-    ''' Converts HWC format (height, width, channels) to VideoFrame.
-    def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
-        return av.VideoFrame.from_ndarray(
-            (output * 255.0).clamp(0, 255).to(dtype=torch.uint8).squeeze(0).cpu().numpy()
-        )
-    '''
 
-    '''Converts BCHW tensor [1,3,H,W] to VideoFrame. Assumes values are in range [0,1].'''
     def video_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.VideoFrame:
         return av.VideoFrame.from_ndarray(
             (output.squeeze(0).permute(1, 2, 0) * 255.0)
@@ -121,7 +239,7 @@ def audio_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.Audio
 
     async def get_processed_video_frame(self):
         try:
-            # Get the frame from the incoming queue first
+            # Get the frame from the incoming queue first to maintain timing
             frame = await self.video_incoming_frames.get()
             
             # Skip frames if we're falling behind
@@ -130,11 +248,11 @@ async def get_processed_video_frame(self):
                 frame.side_data.skipped = True
                 frame = await self.video_incoming_frames.get()
                 logger.info("Skipped older frame to catch up")
-
-            # Get the processed frame from the output queue
-            out_tensor = await self.client.get_video_output()
             
-            # Process only the most recent frame
+            # Get the processed frame from our output queue
+            frame_id, out_tensor = await self.processed_video_frames.get()
+            
+            # Process the frame
             processed_frame = self.video_postprocess(out_tensor)
             processed_frame.pts = frame.pts
             processed_frame.time_base = frame.time_base
@@ -148,10 +266,14 @@ async def get_processed_video_frame(self):
             return black_frame
 
     async def get_processed_audio_frame(self):
-        # TODO: make it generic to support purely generative audio cases and also add frame skipping
+        # Only use the first client for audio
+        if not self.clients:
+            logger.warning("No clients available for audio processing")
+            return av.AudioFrame(format='s16', layout='mono', samples=1024)
+            
         frame = await self.audio_incoming_frames.get()
         if frame.samples > len(self.processed_audio_buffer):
-            out_tensor = await self.client.get_audio_output()
+            out_tensor = await self.clients[0].get_audio_output()
             self.processed_audio_buffer = np.concatenate([self.processed_audio_buffer, out_tensor])
         out_data = self.processed_audio_buffer[:frame.samples]
         self.processed_audio_buffer = self.processed_audio_buffer[frame.samples:]
@@ -164,9 +286,31 @@ async def get_processed_audio_frame(self):
         return processed_frame
     
     async def get_nodes_info(self) -> Dict[str, Any]:
-        """Get information about all nodes in the current prompt including metadata."""
-        nodes_info = await self.client.get_available_nodes()
-        return nodes_info
+        """Get information about nodes from the first client"""
+        if not self.clients:
+            return {}
+        return await self.clients[0].get_available_nodes()
     
     async def cleanup(self):
-        await self.client.cleanup()
\ No newline at end of file
+        """Clean up all clients and background tasks"""
+        self.running = False
+        
+        # Cancel collector task
+        if hasattr(self, 'collector_task') and not self.collector_task.done():
+            self.collector_task.cancel()
+            try:
+                await self.collector_task
+            except asyncio.CancelledError:
+                pass
+        
+        # Clean up all clients
+        cleanup_tasks = []
+        for client in self.clients:
+            cleanup_tasks.append(client.cleanup())
+            
+        await asyncio.gather(*cleanup_tasks)
+        logger.info("All clients cleaned up")
+
+
+# For backwards compatibility, maintain the original Pipeline name
+Pipeline = MultiServerPipeline
\ No newline at end of file

From d3b0b3c24704d7e146b48de10ca1145cae204171 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 17:19:27 -0400
Subject: [PATCH 05/14] Work on frame timing and management, added
 max_frame_wait argument, added config for server management.

---
 configs/comfy.toml            |  13 ++++
 requirements.txt              |   1 +
 server/app_api.py             |  10 ++++
 server/config.py              |  45 ++++++++++++++
 server/pipeline_api.py        | 110 +++++++++++++++++++++++++++++-----
 src/comfystream/client_api.py |  23 ++++---
 6 files changed, 174 insertions(+), 28 deletions(-)
 create mode 100644 configs/comfy.toml
 create mode 100644 server/config.py

diff --git a/configs/comfy.toml b/configs/comfy.toml
new file mode 100644
index 00000000..5d278541
--- /dev/null
+++ b/configs/comfy.toml
@@ -0,0 +1,13 @@
+# Configuration for multiple ComfyUI servers
+
+[[servers]]
+host = "127.0.0.1"
+port = 8188
+client_id = "client1"
+
+# Adding more servers:
+
+# [[servers]]
+# host = "127.0.0.1" 
+# port = 8189
+# client_id = "client2"
diff --git a/requirements.txt b/requirements.txt
index 4a7e68ad..56fe8b22 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,6 @@ comfyui @ git+https://github.com/hiddenswitch/ComfyUI.git@ce3583ad42c024b8f060d0
 aiortc
 aiohttp
 toml
+tomli
 twilio
 prometheus_client
diff --git a/server/app_api.py b/server/app_api.py
index 94fc36af..b7b56e56 100644
--- a/server/app_api.py
+++ b/server/app_api.py
@@ -345,6 +345,7 @@ async def on_startup(app: web.Application):
 
     app["pipeline"] = Pipeline(
         config_path=app["config_file"],
+        max_frame_wait_ms=app["max_frame_wait"],
         cwd=app["workspace"], 
         disable_cuda_malloc=True, 
         gpu_only=True, 
@@ -353,6 +354,8 @@ async def on_startup(app: web.Application):
     app["pcs"] = set()
     app["video_tracks"] = {}
 
+    app["max_frame_wait"] = args.max_frame_wait
+
 
 async def on_shutdown(app: web.Application):
     pcs = app["pcs"]
@@ -395,6 +398,12 @@ async def on_shutdown(app: web.Application):
         action="store_true",
         help="Include stream ID as a label in Prometheus metrics.",
     )
+    parser.add_argument(
+        "--max-frame-wait",
+        type=int,
+        default=500,
+        help="Maximum time to wait for a frame in milliseconds before dropping it"
+    )
     args = parser.parse_args()
 
     logging.basicConfig(
@@ -407,6 +416,7 @@ async def on_shutdown(app: web.Application):
     app["media_ports"] = args.media_ports.split(",") if args.media_ports else None
     app["workspace"] = args.workspace
     app["config_file"] = args.config_file
+    app["max_frame_wait"] = args.max_frame_wait
 
     app.on_startup.append(on_startup)
     app.on_shutdown.append(on_shutdown)
diff --git a/server/config.py b/server/config.py
new file mode 100644
index 00000000..7f066643
--- /dev/null
+++ b/server/config.py
@@ -0,0 +1,45 @@
+import tomli
+import logging
+from typing import List, Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+
+class ComfyConfig:
+    def __init__(self, config_path: Optional[str] = None):
+        self.servers = []
+        self.config_path = config_path
+        if config_path:
+            self.load_config(config_path)
+        else:
+            # Default to single local server if no config provided
+            self.servers = [{"host": "127.0.0.1", "port": 8188}]
+            
+    def load_config(self, config_path: str):
+        """Load server configuration from TOML file"""
+        try:
+            with open(config_path, "rb") as f:
+                config = tomli.load(f)
+            
+            # Extract server configurations
+            if "servers" in config:
+                self.servers = config["servers"]
+                logger.info(f"Loaded {len(self.servers)} server configurations")
+            else:
+                logger.warning("No servers defined in config, using default")
+                self.servers = [{"host": "127.0.0.1", "port": 8198}]
+                
+            # Validate each server has required fields
+            for i, server in enumerate(self.servers):
+                if "host" not in server or "port" not in server:
+                    logger.warning(f"Server {i} missing host or port, using defaults")
+                    server["host"] = server.get("host", "127.0.0.1")
+                    server["port"] = server.get("port", 8198)
+                
+        except Exception as e:
+            logger.error(f"Error loading config from {config_path}: {e}")
+            # Fall back to default server
+            self.servers = [{"host": "127.0.0.1", "port": 8198}]
+    
+    def get_servers(self) -> List[Dict[str, Any]]:
+        """Return list of server configurations"""
+        return self.servers 
\ No newline at end of file
diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index 6849c265..d2aedf51 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -5,7 +5,7 @@
 import logging
 import time
 import random
-from collections import deque
+from collections import deque, OrderedDict
 
 from typing import Any, Dict, Union, List, Optional, Deque
 from comfystream.client_api import ComfyStreamClient
@@ -16,7 +16,7 @@
 
 
 class MultiServerPipeline:
-    def __init__(self, config_path: Optional[str] = None, **kwargs):
+    def __init__(self, config_path: Optional[str] = None, max_frame_wait_ms: int = 500, **kwargs):
         # Load server configurations
         self.config = ComfyConfig(config_path)
         self.servers = self.config.get_servers()
@@ -40,8 +40,10 @@ def __init__(self, config_path: Optional[str] = None, **kwargs):
         self.current_client_index = 0
         self.client_frame_mapping = {}  # Maps frame_id -> client_index
         
-        # Buffer to store frames in order of original pts
-        self.frame_output_buffer: Deque = deque()
+        # Frame ordering and timing
+        self.max_frame_wait_ms = max_frame_wait_ms  # Max time to wait for a frame before dropping
+        self.next_expected_frame_id = None  # Track expected frame ID
+        self.ordered_frames = OrderedDict()  # Buffer for ordering frames (frame_id -> (timestamp, tensor))
         
         # Audio processing
         self.processed_audio_buffer = np.array([], dtype=np.int16)
@@ -71,16 +73,17 @@ async def _collect_processed_frames(self):
                                 )
                                 
                                 # Find which original frame this corresponds to
-                                # (using a simple approach here - could be improved)
-                                # In real implementation, need to track which frames went to which client
                                 frame_ids = [frame_id for frame_id, client_idx in 
                                           self.client_frame_mapping.items() if client_idx == i]
                                 
                                 if frame_ids:
                                     # Use the oldest frame ID for this client
                                     frame_id = min(frame_ids)
-                                    # Store the processed tensor along with original frame ID for ordering
-                                    await self.processed_video_frames.put((frame_id, out_tensor))
+                                    
+                                    # Store frame with timestamp for ordering
+                                    timestamp = time.time()
+                                    await self._add_frame_to_ordered_buffer(frame_id, timestamp, out_tensor)
+                                    
                                     # Remove the mapping
                                     self.client_frame_mapping.pop(frame_id, None)
                                     logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
@@ -90,6 +93,9 @@ async def _collect_processed_frames(self):
                     except Exception as e:
                         logger.error(f"Error collecting frame from client {i}: {e}")
                 
+                # Check for frames that have waited too long
+                await self._check_frame_timeouts()
+                
                 # Small sleep to avoid CPU spinning
                 await asyncio.sleep(0.01)
         except asyncio.CancelledError:
@@ -97,6 +103,70 @@ async def _collect_processed_frames(self):
         except Exception as e:
             logger.error(f"Unexpected error in frame collector: {e}")
 
+    async def _add_frame_to_ordered_buffer(self, frame_id, timestamp, tensor):
+        """Add a processed frame to the ordered buffer"""
+        self.ordered_frames[frame_id] = (timestamp, tensor)
+        
+        # If this is the first frame, set the next expected frame ID
+        if self.next_expected_frame_id is None:
+            self.next_expected_frame_id = frame_id
+            
+        # Check if we can release any frames now
+        await self._release_ordered_frames()
+
+    async def _release_ordered_frames(self):
+        """Process ordered frames and put them in the output queue"""
+        # If we don't have a next expected frame yet, can't do anything
+        if self.next_expected_frame_id is None:
+            return
+            
+        # Check if the next expected frame is in our buffer
+        while self.ordered_frames and self.next_expected_frame_id in self.ordered_frames:
+            # Get the frame
+            timestamp, tensor = self.ordered_frames.pop(self.next_expected_frame_id)
+            
+            # Put it in the output queue
+            await self.processed_video_frames.put((self.next_expected_frame_id, tensor))
+            logger.info(f"Released frame {self.next_expected_frame_id} to output queue")
+            
+            # Update the next expected frame ID to the next sequential ID if possible
+            # (or the lowest frame ID in our buffer)
+            if self.ordered_frames:
+                self.next_expected_frame_id = min(self.ordered_frames.keys())
+            else:
+                # If no more frames, keep the last ID + 1 as next expected
+                self.next_expected_frame_id += 1
+
+    async def _check_frame_timeouts(self):
+        """Check for frames that have waited too long and handle them"""
+        if not self.ordered_frames or self.next_expected_frame_id is None:
+            return
+            
+        current_time = time.time()
+        
+        # If the next expected frame has timed out, skip it and move on
+        if self.next_expected_frame_id in self.ordered_frames:
+            timestamp, _ = self.ordered_frames[self.next_expected_frame_id]
+            wait_time_ms = (current_time - timestamp) * 1000
+            
+            if wait_time_ms > self.max_frame_wait_ms:
+                logger.warning(f"Frame {self.next_expected_frame_id} exceeded max wait time, releasing anyway")
+                await self._release_ordered_frames()
+                
+        # Check if we're missing the next expected frame and it's been too long
+        elif self.ordered_frames:
+            # The next frame we're expecting isn't in the buffer
+            # Check how long we've been waiting since the oldest frame in the buffer
+            oldest_frame_id = min(self.ordered_frames.keys())
+            oldest_timestamp, _ = self.ordered_frames[oldest_frame_id]
+            wait_time_ms = (current_time - oldest_timestamp) * 1000
+            
+            # If we've waited too long, skip the missing frame(s)
+            if wait_time_ms > self.max_frame_wait_ms:
+                logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
+                self.next_expected_frame_id = oldest_frame_id
+                await self._release_ordered_frames()
+
     async def warm_video(self):
         """Warm up the video pipeline with dummy frames for each client"""
         logger.info("Warming up video pipeline...")
@@ -176,8 +246,13 @@ async def put_video_frame(self, frame: av.VideoFrame):
             
         self.last_frame_time = current_time
         
-        # Generate a unique frame ID
-        frame_id = int(time.time() * 1000000)  # Microseconds as ID
+        # Generate a unique frame ID - use sequential IDs for better ordering
+        if not hasattr(self, 'next_frame_id'):
+            self.next_frame_id = 1
+        
+        frame_id = self.next_frame_id
+        self.next_frame_id += 1
+
         frame.side_data.frame_id = frame_id
         
         # Preprocess the frame
@@ -195,7 +270,7 @@ async def put_video_frame(self, frame: av.VideoFrame):
         self.clients[client_index].put_video_input(frame)
         
         # Also add to the incoming queue for reference
-        await self.video_incoming_frames.put(frame)
+        await self.video_incoming_frames.put((frame_id, frame))
         
         logger.info(f"Sent frame {frame_id} to client {client_index}")
 
@@ -239,18 +314,21 @@ def audio_postprocess(self, output: Union[torch.Tensor, np.ndarray]) -> av.Audio
 
     async def get_processed_video_frame(self):
         try:
-            # Get the frame from the incoming queue first to maintain timing
-            frame = await self.video_incoming_frames.get()
+            # Get the original frame from the incoming queue first to maintain timing
+            frame_id, frame = await self.video_incoming_frames.get()
             
             # Skip frames if we're falling behind
             while not self.video_incoming_frames.empty():
                 # Get newer frame and mark old one as skipped
                 frame.side_data.skipped = True
-                frame = await self.video_incoming_frames.get()
-                logger.info("Skipped older frame to catch up")
+                frame_id, frame = await self.video_incoming_frames.get()
+                logger.info(f"Skipped older frame {frame_id} to catch up")
             
             # Get the processed frame from our output queue
-            frame_id, out_tensor = await self.processed_video_frames.get()
+            processed_frame_id, out_tensor = await self.processed_video_frames.get()
+            
+            if processed_frame_id != frame_id:
+                logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
             
             # Process the frame
             processed_frame = self.video_postprocess(out_tensor)
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index 624de56e..40801205 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -84,7 +84,6 @@ async def run_prompt(self, prompt_index: int):
         
         # Always set execution complete at start to allow first frame to be processed
         self.execution_complete_event.set()
-        logger.info("Setting execution_complete_event to TRUE at start")
         
         try:
             while True:
@@ -97,16 +96,15 @@ async def run_prompt(self, prompt_index: int):
                 if self.execution_complete_event.is_set():
                     # Reset execution state for next frame
                     self.execution_complete_event.clear()
-                    logger.info("Setting execution_complete_event to FALSE before executing prompt")
                     
                     # Queue the prompt with the current frame
                     await self._execute_prompt(prompt_index)
                     
                     # Wait for execution completion with timeout
                     try:
-                        logger.info("Waiting for execution to complete (max 10 seconds)...")
+                        # logger.info("Waiting for execution to complete (max 10 seconds)...")
                         await asyncio.wait_for(self.execution_complete_event.wait(), timeout=10.0)
-                        logger.info("Execution complete, ready for next frame")
+                        # logger.info("Execution complete, ready for next frame")
                     except asyncio.TimeoutError:
                         logger.error("Timeout waiting for execution, forcing continuation")
                         self.execution_complete_event.set()
@@ -379,7 +377,7 @@ async def _execute_prompt(self, prompt_index: int):
             
             # Check if we have a frame waiting to be processed
             if not tensor_cache.image_inputs.empty():
-                logger.info("Found tensor in input queue, preparing for API")
+                # logger.info("Found tensor in input queue, preparing for API")
                 # Get the most recent frame only
                 frame_or_tensor = None
                 while not tensor_cache.image_inputs.empty():
@@ -514,15 +512,16 @@ async def _send_tensor_via_websocket(self, tensor):
             # Check tensor dimensions and log detailed info
             logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
             
-            # CRITICAL FIX: The issue is with the shape - no need to resize if dimensions are fine
-            if tensor.shape[1] < 64 or tensor.shape[2] < 64:
-                logger.warning(f"Tensor dimensions too small: {tensor.shape}. Resizing to 512x512")
+            # Always ensure consistent 512x512 dimensions
+            '''
+            if tensor.shape[1] != 512 or tensor.shape[2] != 512:
+                logger.info(f"Resizing tensor from {tensor.shape} to standard 512x512")
                 import torch.nn.functional as F
                 tensor = tensor.unsqueeze(0)  # Add batch dimension for interpolate
                 tensor = F.interpolate(tensor, size=(512, 512), mode='bilinear', align_corners=False)
                 tensor = tensor.squeeze(0)  # Remove batch dimension after resize
-                logger.info(f"Resized tensor to: {tensor.shape}")
-            
+            '''
+
             # Check for NaN or Inf values
             if torch.isnan(tensor).any() or torch.isinf(tensor).any():
                 logger.warning("Tensor contains NaN or Inf values! Replacing with zeros.")
@@ -648,9 +647,9 @@ def put_audio_input(self, frame):
         
     async def get_video_output(self):
         """Get processed video frame from tensor cache"""
-        logger.info("Waiting for processed tensor from output queue")
+        # logger.info("Waiting for processed tensor from output queue")
         result = await tensor_cache.image_outputs.get()
-        logger.info(f"Got processed tensor from output queue: shape={result.shape if hasattr(result, 'shape') else 'unknown'}")
+        # logger.info(f"Got processed tensor from output queue: shape={result.shape if hasattr(result, 'shape') else 'unknown'}")
         return result
     
     async def get_audio_output(self):

From 63014e3f4e6a0d6c8684589053297d9b74facc0c Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 17:37:31 -0400
Subject: [PATCH 06/14] Cleaned up prompt manipulation with custom nodes.

---
 src/comfystream/utils_api.py | 93 +++++++++++++++++++++++++++++++-----
 1 file changed, 81 insertions(+), 12 deletions(-)

diff --git a/src/comfystream/utils_api.py b/src/comfystream/utils_api.py
index 5a932ef1..d207f4e3 100644
--- a/src/comfystream/utils_api.py
+++ b/src/comfystream/utils_api.py
@@ -15,6 +15,15 @@ def create_load_tensor_node():
         "_meta": {"title": "Load Tensor (API)"},
     }
 
+def create_load_image_node():
+    return {
+        "inputs": {
+            "image": ""  # Should be "image" not "image_data" to match LoadImageBase64
+        },
+        "class_type": "LoadImageBase64",
+        "_meta": {"title": "Load Image Base64 (ComfyStream)"},
+    }
+
 def create_save_tensor_node(inputs: Dict[Any, Any]):
     """Create a SaveTensorAPI node with proper input formatting"""
     # Make sure images input is properly formatted [node_id, output_index]
@@ -35,10 +44,38 @@ def create_save_tensor_node(inputs: Dict[Any, Any]):
         "_meta": {"title": "Save Tensor (API)"},
     }
 
-def convert_prompt(prompt):
+def create_save_image_node(inputs: Dict[Any, Any]):
+    # Get the correct image input reference
+    images_input = inputs.get("images", inputs.get("image"))
+    
+    # If not properly formatted, use default
+    if not images_input:
+        images_input = ["", 0]  # Default empty value
+    
+    return {
+        "inputs": {
+            "images": images_input,
+            "format": "PNG"  # Default format
+        },
+        "class_type": "SendImageWebsocket",    
+        "_meta": {"title": "Send Image Websocket (ComfyStream)"},
+    }
 
+def convert_prompt(prompt):
     logging.info("Converting prompt: %s", prompt)
 
+    # Initialize counters
+    num_primary_inputs = 0
+    num_inputs = 0
+    num_outputs = 0
+
+    keys = {
+        "PrimaryInputLoadImage": [],
+        "LoadImage": [],
+        "PreviewImage": [],
+        "SaveImage": [],
+    }
+
     # Set random seeds for any seed nodes
     for key, node in prompt.items():
         if not isinstance(node, dict) or "inputs" not in node:
@@ -50,20 +87,52 @@ def convert_prompt(prompt):
             random_seed = random.randint(0, 18446744073709551615)
             node["inputs"]["seed"] = random_seed
             print(f"Set random seed {random_seed} for node {key}")
+    
+    for key, node in prompt.items():
+        class_type = node.get("class_type")
 
+        # Collect keys for nodes that might need to be replaced
+        if class_type in keys:
+            keys[class_type].append(key)
 
-    # Replace LoadImage with LoadImageBase64
-    for key, node in prompt.items():
-        if node.get("class_type") == "LoadImage":
-            node["class_type"] = "LoadImageBase64"
+        # Count inputs and outputs
+        if class_type == "PrimaryInputLoadImage":
+            num_primary_inputs += 1
+        elif class_type in ["LoadImage", "LoadImageBase64"]:
+            num_inputs += 1
+        elif class_type in ["PreviewImage", "SaveImage", "SendImageWebsocket"]:
+            num_outputs += 1
 
-    # Replace SaveImage/PreviewImage with SendImageWebsocket
-    for key, node in prompt.items():
-        if node.get("class_type") in ["SaveImage", "PreviewImage"]:
-            node["class_type"] = "SendImageWebsocket"
-            # Ensure format is set
-            if "format" not in node["inputs"]:
-                node["inputs"]["format"] = "PNG"  # Set default format
+    # Only handle single primary input
+    if num_primary_inputs > 1:
+        raise Exception("too many primary inputs in prompt")
+
+    # If there are no primary inputs, only handle single input
+    if num_primary_inputs == 0 and num_inputs > 1:
+        raise Exception("too many inputs in prompt")
+
+    # Only handle single output for now
+    if num_outputs > 1:
+        raise Exception("too many outputs in prompt")
+
+    if num_primary_inputs + num_inputs == 0:
+        raise Exception("missing input")
+
+    if num_outputs == 0:
+        raise Exception("missing output")
+
+    # Replace nodes with proper implementations
+    for key in keys["PrimaryInputLoadImage"]:
+        prompt[key] = create_load_image_node()
+
+    if num_primary_inputs == 0 and len(keys["LoadImage"]) == 1:
+        prompt[keys["LoadImage"][0]] = create_load_image_node()
+
+    for key in keys["PreviewImage"] + keys["SaveImage"]:
+        node = prompt[key]
+        prompt[key] = create_save_image_node(node["inputs"])
+
+    # TODO: Validate the processed prompt input
             
     return prompt
 

From 15b51a8283369080f1fdbd6af33b5ac3e460e1ea Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 25 Mar 2025 18:40:17 -0400
Subject: [PATCH 07/14] Added frame tracking, add frame timing stability, added
 mismatched frame size handling, commented out some logging.

---
 server/pipeline_api.py        |  51 ++++++++-----
 src/comfystream/client_api.py | 136 ++++++++++++++++++++++++++--------
 2 files changed, 136 insertions(+), 51 deletions(-)

diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index d2aedf51..1ff1826a 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -67,26 +67,36 @@ async def _collect_processed_frames(self):
                             # Get frame without waiting
                             try:
                                 # Use wait_for with small timeout to avoid blocking
-                                out_tensor = await asyncio.wait_for(
+                                result = await asyncio.wait_for(
                                     client.get_video_output(), 
                                     timeout=0.01
                                 )
                                 
-                                # Find which original frame this corresponds to
-                                frame_ids = [frame_id for frame_id, client_idx in 
-                                          self.client_frame_mapping.items() if client_idx == i]
-                                
-                                if frame_ids:
-                                    # Use the oldest frame ID for this client
-                                    frame_id = min(frame_ids)
-                                    
-                                    # Store frame with timestamp for ordering
-                                    timestamp = time.time()
-                                    await self._add_frame_to_ordered_buffer(frame_id, timestamp, out_tensor)
+                                # Check if result is already a tuple with frame_id
+                                if isinstance(result, tuple) and len(result) == 2:
+                                    frame_id, out_tensor = result
+                                    # logger.info(f"Got result with embedded frame_id: {frame_id}")
+                                else:
+                                    out_tensor = result
+                                    # Find which original frame this corresponds to using our mapping
+                                    frame_ids = [frame_id for frame_id, client_idx in 
+                                              self.client_frame_mapping.items() if client_idx == i]
                                     
-                                    # Remove the mapping
-                                    self.client_frame_mapping.pop(frame_id, None)
-                                    logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
+                                    if frame_ids:
+                                        # Use the oldest frame ID for this client
+                                        frame_id = min(frame_ids)
+                                    else:
+                                        # If no mapping found, log warning and continue
+                                        logger.warning(f"No frame_id mapping found for tensor from client {i}")
+                                        continue
+                                
+                                # Store frame with timestamp for ordering
+                                timestamp = time.time()
+                                await self._add_frame_to_ordered_buffer(frame_id, timestamp, out_tensor)
+                                
+                                # Remove the mapping
+                                self.client_frame_mapping.pop(frame_id, None)
+                                # logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
                             except asyncio.TimeoutError:
                                 # No frame ready yet, continue
                                 pass
@@ -127,7 +137,7 @@ async def _release_ordered_frames(self):
             
             # Put it in the output queue
             await self.processed_video_frames.put((self.next_expected_frame_id, tensor))
-            logger.info(f"Released frame {self.next_expected_frame_id} to output queue")
+            # logger.info(f"Released frame {self.next_expected_frame_id} to output queue")
             
             # Update the next expected frame ID to the next sequential ID if possible
             # (or the lowest frame ID in our buffer)
@@ -163,7 +173,7 @@ async def _check_frame_timeouts(self):
             
             # If we've waited too long, skip the missing frame(s)
             if wait_time_ms > self.max_frame_wait_ms:
-                logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
+                # logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
                 self.next_expected_frame_id = oldest_frame_id
                 await self._release_ordered_frames()
 
@@ -272,7 +282,7 @@ async def put_video_frame(self, frame: av.VideoFrame):
         # Also add to the incoming queue for reference
         await self.video_incoming_frames.put((frame_id, frame))
         
-        logger.info(f"Sent frame {frame_id} to client {client_index}")
+        # logger.info(f"Sent frame {frame_id} to client {client_index}")
 
     async def put_audio_frame(self, frame: av.AudioFrame):
         # For now, only use the first client for audio
@@ -322,13 +332,14 @@ async def get_processed_video_frame(self):
                 # Get newer frame and mark old one as skipped
                 frame.side_data.skipped = True
                 frame_id, frame = await self.video_incoming_frames.get()
-                logger.info(f"Skipped older frame {frame_id} to catch up")
+                # logger.info(f"Skipped older frame {frame_id} to catch up")
             
             # Get the processed frame from our output queue
             processed_frame_id, out_tensor = await self.processed_video_frames.get()
             
             if processed_frame_id != frame_id:
-                logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
+                # logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
+                pass
             
             # Process the frame
             processed_frame = self.video_postprocess(out_tensor)
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index 40801205..f8725aed 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -45,6 +45,10 @@ def __init__(self, host: str = "127.0.0.1", port: int = 8198, **kwargs):
         self.execution_started = False
         self._prompt_id = None
         
+        # Add frame tracking
+        self._current_frame_id = None  # Track the current frame being processed
+        self._frame_id_mapping = {}    # Map prompt_ids to frame_ids
+        
         # Configure logging
         if 'log_level' in kwargs:
             logger.setLevel(kwargs['log_level'])
@@ -358,8 +362,23 @@ async def _handle_binary_message(self, binary_data):
                 with torch.no_grad():
                     tensor = torch.from_numpy(np.array(img)).float().permute(2, 0, 1).unsqueeze(0) / 255.0
                 
-                # Add to output queue without waiting
-                tensor_cache.image_outputs.put_nowait(tensor)
+                # Try to get frame_id from mapping using current prompt_id
+                frame_id = None
+                if hasattr(self, '_prompt_id') and self._prompt_id in self._frame_id_mapping:
+                    frame_id = self._frame_id_mapping.get(self._prompt_id)
+                    # logger.info(f"Using frame_id {frame_id} from prompt_id {self._prompt_id}")
+                elif hasattr(self, '_current_frame_id') and self._current_frame_id is not None:
+                    frame_id = self._current_frame_id
+                    # logger.info(f"Using current frame_id {frame_id}")
+                
+                # Add to output queue - include frame_id if available
+                if frame_id is not None:
+                    tensor_cache.image_outputs.put_nowait((frame_id, tensor))
+                    # logger.info(f"Added tensor with frame_id {frame_id} to output queue")
+                else:
+                    tensor_cache.image_outputs.put_nowait(tensor)
+                    #logger.info("Added tensor without frame_id to output queue")
+                
                 self.execution_complete_event.set()
                 
             except Exception as img_error:
@@ -377,16 +396,26 @@ async def _execute_prompt(self, prompt_index: int):
             
             # Check if we have a frame waiting to be processed
             if not tensor_cache.image_inputs.empty():
-                # logger.info("Found tensor in input queue, preparing for API")
                 # Get the most recent frame only
                 frame_or_tensor = None
                 while not tensor_cache.image_inputs.empty():
                     frame_or_tensor = tensor_cache.image_inputs.get_nowait()
                 
+                # Extract frame ID if available in side_data
+                frame_id = None
+                if hasattr(frame_or_tensor, 'side_data'):
+                    # Try to get frame_id from side_data
+                    if hasattr(frame_or_tensor.side_data, 'frame_id'):
+                        frame_id = frame_or_tensor.side_data.frame_id
+                        logger.info(f"Found frame_id in side_data: {frame_id}")
+                
+                # Store current frame ID for binary message handler to use
+                self._current_frame_id = frame_id
+                
                 # Find ETN_LoadImageBase64 nodes first
                 load_image_nodes = []
                 for node_id, node in prompt.items():
-                    if isinstance(node, dict) and node.get("class_type") in ["ETN_LoadImageBase64", "LoadImageBase64"]:
+                    if isinstance(node, dict) and node.get("class_type") in ["LoadImageBase64"]:
                         load_image_nodes.append(node_id)
                 
                 if not load_image_nodes:
@@ -415,34 +444,64 @@ async def _execute_prompt(self, prompt_index: int):
                         self.execution_complete_event.set()
                         return
                     
-                    # Process tensor format only once
+                    # Process tensor format only once - streamlined for speed and reliability
                     with torch.no_grad():
-                        tensor = tensor.detach().cpu().float()
-                        
-                        # Handle different formats
-                        if len(tensor.shape) == 4:  # BCHW format (batch)
-                            tensor = tensor[0]  # Take first image from batch
-                        
-                        # Ensure it's in CHW format
-                        if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
-                            tensor = tensor.permute(2, 0, 1)  # Convert to CHW
-                        
-                        # Convert to PIL image for base64 ONLY ONCE
-                        tensor_np = (tensor.permute(1, 2, 0) * 255).clamp(0, 255).numpy().astype(np.uint8)
-                        img = Image.fromarray(tensor_np)
+                        # Fast tensor normalization to ensure consistent output
+                        try:
+                            # TODO: Why is the UI sending different sizes? Should be fixed no? This breaks tensorrt
+                            #       I'm sometimes seeing (BCHW): torch.Size([1, 384, 384, 3]), H=384, W=3
+                            # Ensure minimum size of 512x512
+
+                            # Handle batch dimension if present
+                            if len(tensor.shape) == 4:  # BCHW format
+                                tensor = tensor[0]  # Take first image from batch
+                            
+                            # Normalize to CHW format consistently
+                            if len(tensor.shape) == 3 and tensor.shape[2] == 3:  # HWC format
+                                tensor = tensor.permute(2, 0, 1)  # Convert to CHW
+                            
+                            # Handle single-channel case
+                            if len(tensor.shape) == 3 and tensor.shape[0] == 1:
+                                tensor = tensor.repeat(3, 1, 1)  # Convert grayscale to RGB
+                            
+                            # Ensure tensor is on CPU
+                            if tensor.is_cuda:
+                                tensor = tensor.cpu()
+                            
+                            # Always resize to 512x512 for consistency (faster than checking dimensions first)
+                            tensor = tensor.unsqueeze(0)  # Add batch dim for interpolate
+                            tensor = torch.nn.functional.interpolate(
+                                tensor, size=(512, 512), mode='bilinear', align_corners=False
+                            )
+                            tensor = tensor[0]  # Remove batch dimension
+                            
+                            # Direct conversion to PIL without intermediate numpy step for speed
+                            tensor_np = (tensor.permute(1, 2, 0).clamp(0, 1) * 255).to(torch.uint8).numpy()
+                            img = Image.fromarray(tensor_np)
+                            
+                            # Fast JPEG encoding with balanced quality
+                            buffer = BytesIO()
+                            img.save(buffer, format="JPEG", quality=90, optimize=True)
+                            buffer.seek(0)
+                            img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                            
+                        except Exception as e:
+                            logger.warning(f"Error in tensor processing: {e}, creating fallback image")
+                            # Create a standard 512x512 placeholder if anything fails
+                            img = Image.new('RGB', (512, 512), color=(100, 149, 237))
+                            buffer = BytesIO()
+                            img.save(buffer, format="JPEG", quality=90)
+                            buffer.seek(0)
+                            img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
                         
-                        # Convert to base64 ONCE for all nodes
-                        buffer = BytesIO()
-                        img.save(buffer, format="PNG")
-                        buffer.seek(0)
-                        img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                        # Add timestamp for cache busting (once, outside the try/except)
+                        timestamp = int(time.time() * 1000)
                     
                     # Update all nodes with the SAME base64 string
-                    timestamp = int(time.time() * 1000)
                     for node_id in load_image_nodes:
                         prompt[node_id]["inputs"]["image"] = img_base64
                         prompt[node_id]["inputs"]["_timestamp"] = timestamp
-                        # Use timestamp as cache buster instead of random number
+                        # Use timestamp as cache buster
                         prompt[node_id]["inputs"]["_cache_buster"] = str(timestamp)
                 
                 except Exception as e:
@@ -462,6 +521,12 @@ async def _execute_prompt(self, prompt_index: int):
                         if response.status == 200:
                             result = await response.json()
                             self._prompt_id = result.get("prompt_id")
+                            
+                            # Map prompt_id to frame_id for later retrieval
+                            if frame_id is not None:
+                                self._frame_id_mapping[self._prompt_id] = frame_id
+                                # logger.info(f"Mapped prompt_id {self._prompt_id} to frame_id {frame_id}")
+                            
                             self.execution_started = True
                         else:
                             error_text = await response.text()
@@ -493,7 +558,8 @@ async def _send_tensor_via_websocket(self, tensor):
             # Prepare binary data
             if len(tensor.shape) == 4:  # BCHW format (batch of images)
                 if tensor.shape[0] > 1:
-                    logger.info(f"Taking first image from batch of {tensor.shape[0]}")
+                    # logger.info(f"Taking first image from batch of {tensor.shape[0]}")
+                    pass
                 tensor = tensor[0]  # Take first image if batch
             
             # Ensure CHW format (3 channels)
@@ -510,7 +576,7 @@ async def _send_tensor_via_websocket(self, tensor):
                 tensor = torch.zeros(3, 512, 512)
             
             # Check tensor dimensions and log detailed info
-            logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
+            # logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
             
             # Always ensure consistent 512x512 dimensions
             '''
@@ -557,7 +623,7 @@ async def _send_tensor_via_websocket(self, tensor):
             
             # Send binary data via websocket
             await self.ws.send(full_data)
-            logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
+            # logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
             
         except Exception as e:
             logger.error(f"Error sending tensor via websocket: {e}")
@@ -647,10 +713,18 @@ def put_audio_input(self, frame):
         
     async def get_video_output(self):
         """Get processed video frame from tensor cache"""
-        # logger.info("Waiting for processed tensor from output queue")
         result = await tensor_cache.image_outputs.get()
-        # logger.info(f"Got processed tensor from output queue: shape={result.shape if hasattr(result, 'shape') else 'unknown'}")
-        return result
+        
+        # Check if the result is a tuple with frame_id
+        if isinstance(result, tuple) and len(result) == 2:
+            frame_id, tensor = result
+            # logger.info(f"Got processed tensor from output queue with frame_id {frame_id}")
+            # Return both the frame_id and tensor to help with ordering in the pipeline
+            return frame_id, tensor
+        else:
+            # If it's not a tuple with frame_id, just return the tensor
+            # logger.info("Got processed tensor from output queue without frame_id")
+            return result
     
     async def get_audio_output(self):
         """Get processed audio frame from tensor cache"""

From 80636b648443ae3e4d210bd85c7e49cdf6759bfb Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 1 Apr 2025 12:15:56 -0400
Subject: [PATCH 08/14] Removed requirement for workspace in app startup.

---
 server/app_api.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/server/app_api.py b/server/app_api.py
index b7b56e56..b5676aae 100644
--- a/server/app_api.py
+++ b/server/app_api.py
@@ -11,7 +11,6 @@
 if torch.cuda.is_available():
     torch.cuda.init()
 
-
 from aiohttp import web
 from aiortc import (
     MediaStreamTrack,
@@ -317,6 +316,7 @@ async def on_connectionstatechange():
         ),
     )
 
+
 async def cancel_collect_frames(track):
     track.running = False
     if hasattr(track, 'collect_task') is not None and not track.collect_task.done():
@@ -326,6 +326,7 @@ async def cancel_collect_frames(track):
         except (asyncio.CancelledError):
             pass
 
+
 async def set_prompt(request):
     pipeline = request.app["pipeline"]
 
@@ -346,7 +347,6 @@ async def on_startup(app: web.Application):
     app["pipeline"] = Pipeline(
         config_path=app["config_file"],
         max_frame_wait_ms=app["max_frame_wait"],
-        cwd=app["workspace"], 
         disable_cuda_malloc=True, 
         gpu_only=True, 
         preview_method='none'
@@ -371,9 +371,6 @@ async def on_shutdown(app: web.Application):
         "--media-ports", default=None, help="Set the UDP ports for WebRTC media"
     )
     parser.add_argument("--host", default="127.0.0.1", help="Set the host")
-    parser.add_argument(
-        "--workspace", default=None, required=True, help="Set Comfy workspace"
-    )
     parser.add_argument(
         "--log-level",
         default="INFO",
@@ -414,7 +411,6 @@ async def on_shutdown(app: web.Application):
 
     app = web.Application()
     app["media_ports"] = args.media_ports.split(",") if args.media_ports else None
-    app["workspace"] = args.workspace
     app["config_file"] = args.config_file
     app["max_frame_wait"] = args.max_frame_wait
 

From 2cdb6faaa8dfe7e693f30edb6d23f92511df7b56 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 1 Apr 2025 15:07:20 -0400
Subject: [PATCH 09/14] Cleanup of logging, added log_level argument, testing
 of send tensor websocket node.

---
 nodes/native_utils/__init__.py              |   7 +-
 nodes/native_utils/send_tensor_websocket.py | 289 ++++++++++++++++++++
 server/app_api.py                           |   8 +-
 server/pipeline_api.py                      |  16 +-
 src/comfystream/client_api.py               |  90 ++----
 src/comfystream/utils_api.py                |  97 ++-----
 6 files changed, 354 insertions(+), 153 deletions(-)
 create mode 100644 nodes/native_utils/send_tensor_websocket.py

diff --git a/nodes/native_utils/__init__.py b/nodes/native_utils/__init__.py
index e7e7789c..472e5411 100644
--- a/nodes/native_utils/__init__.py
+++ b/nodes/native_utils/__init__.py
@@ -1,16 +1,19 @@
 from .load_image_base64 import LoadImageBase64
 from .send_image_websocket import SendImageWebsocket
+from .send_tensor_websocket import SendTensorWebSocket
 
 # This dictionary is used by ComfyUI to register the nodes
 NODE_CLASS_MAPPINGS = {
     "LoadImageBase64": LoadImageBase64, 
-    "SendImageWebsocket": SendImageWebsocket
+    "SendImageWebsocket": SendImageWebsocket,
+    "SendTensorWebSocket": SendTensorWebSocket
 }
 
 # This dictionary provides display names for the nodes in the UI
 NODE_DISPLAY_NAME_MAPPINGS = {
     "LoadImageBase64": "Load Image Base64 (ComfyStream)",
-    "SendImageWebsocket": "Send Image Websocket (ComfyStream)"
+    "SendImageWebsocket": "Send Image Websocket (ComfyStream)",
+    "SendTensorWebSocket": "Save Tensor WebSocket (ComfyStream)"
 }
 
 # Export these variables for ComfyUI to use
diff --git a/nodes/native_utils/send_tensor_websocket.py b/nodes/native_utils/send_tensor_websocket.py
new file mode 100644
index 00000000..33104896
--- /dev/null
+++ b/nodes/native_utils/send_tensor_websocket.py
@@ -0,0 +1,289 @@
+import torch
+import numpy as np
+import base64
+import logging
+import json
+import traceback
+import sys
+
+logger = logging.getLogger(__name__)
+
+# Log when the module is loaded
+logger.debug("------------------ SendTensorWebSocket Module Loaded ------------------")
+
+class SendTensorWebSocket:
+    def __init__(self):
+        # Output directory is not needed as we send via WebSocket
+        logger.debug("SendTensorWebSocket instance created")
+        pass
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        logger.debug("SendTensorWebSocket.INPUT_TYPES called")
+        return {
+            "required": {
+                # Accept IMAGE input (typical output from VAE Decode)
+                "tensor": ("IMAGE", ),
+            },
+            "hidden": {
+                # These are needed for ComfyUI execution context
+                "prompt": "PROMPT",
+                "extra_pnginfo": "EXTRA_PNGINFO"
+            },
+        }
+
+    RETURN_TYPES = ()  # No direct output connection to other nodes
+    FUNCTION = "save_tensor"
+    OUTPUT_NODE = True
+    CATEGORY = "ComfyStream/native"
+
+    def save_tensor(self, tensor, prompt=None, extra_pnginfo=None):
+        logger.debug("========== SendTensorWebSocket.save_tensor STARTED ==========")
+        logger.info(f"SendTensorWebSocket received input. Type: {type(tensor)}")
+        logger.debug(f"SendTensorWebSocket node is processing tensor with id: {id(tensor)}")
+        
+        # Log memory usage for debugging
+        if torch.cuda.is_available():
+            try:
+                logger.debug(f"CUDA memory allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
+                logger.debug(f"CUDA memory reserved: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
+            except Exception as e:
+                logger.error(f"Error checking CUDA memory: {e}")
+        
+        if tensor is None:
+            logger.error("SendTensorWebSocket received None tensor.")
+            # Return error directly without ui nesting
+            return {"comfystream_tensor_output": {"error": "Input tensor was None"}}
+
+        try:
+            # Log details about the tensor before processing
+            logger.debug(f"Process tensor of type: {type(tensor)}")
+            
+            if isinstance(tensor, torch.Tensor):
+                logger.debug("Processing torch.Tensor...")
+                logger.info(f"Input tensor details: shape={tensor.shape}, dtype={tensor.dtype}, device={tensor.device}")
+                
+                # Additional handling for IMAGE-type tensors (0-1 float values, BCHW format)
+                if len(tensor.shape) == 4:  # BCHW format (batch)
+                    logger.debug(f"Tensor is batched (BCHW): {tensor.shape}")
+                    logger.info(f"Tensor appears to be IMAGE batch. Min: {tensor.min().item()}, Max: {tensor.max().item()}")
+                    logger.debug(f"First batch slice: min={tensor[0].min().item()}, max={tensor[0].max().item()}")
+                    tensor = tensor[0]  # Select first image from batch
+                    logger.debug(f"Selected first batch element. New shape: {tensor.shape}")
+                
+                if len(tensor.shape) == 3:  # CHW format (single image)
+                    logger.debug(f"Tensor is CHW format: {tensor.shape}")
+                    logger.info(f"Tensor appears to be single IMAGE. Min: {tensor.min().item()}, Max: {tensor.max().item()}")
+                    
+                    # Log first few values for debugging
+                    logger.debug(f"First few values: {tensor.flatten()[:10].tolist()}")
+                
+                # Ensure the tensor is on CPU and detached
+                logger.debug(f"Moving tensor to CPU. Current device: {tensor.device}")
+                try:
+                    tensor = tensor.cpu().detach()
+                    logger.debug(f"Tensor moved to CPU successfully: {tensor.device}")
+                except Exception as e:
+                    logger.error(f"Error moving tensor to CPU: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"CPU transfer error: {str(e)}"}}
+                
+                # Convert to numpy
+                logger.debug("Converting tensor to numpy array...")
+                try:
+                    np_array = tensor.numpy()
+                    logger.debug(f"Conversion to numpy successful: shape={np_array.shape}, dtype={np_array.dtype}")
+                    logger.debug(f"NumPy array memory usage: {np_array.nbytes / 1024**2:.2f} MB")
+                except Exception as e:
+                    logger.error(f"Error converting tensor to numpy: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"NumPy conversion error: {str(e)}"}}
+                
+                # Encode the tensor
+                logger.debug("Converting numpy array to bytes...")
+                try:
+                    tensor_bytes = np_array.tobytes()
+                    logger.debug(f"Tensor converted to bytes: {len(tensor_bytes)} bytes")
+                except Exception as e:
+                    logger.error(f"Error converting numpy array to bytes: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"Bytes conversion error: {str(e)}"}}
+                
+                logger.debug("Encoding bytes to base64...")
+                try:
+                    b64_data = base64.b64encode(tensor_bytes).decode('utf-8')
+                    b64_size = len(b64_data)
+                    logger.debug(f"Base64 encoding successful: {b64_size} characters")
+                    if b64_size > 100:
+                        logger.debug(f"Base64 sample: {b64_data[:50]}...{b64_data[-50:]}")
+                except Exception as e:
+                    logger.error(f"Error encoding to base64: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"Base64 encoding error: {str(e)}"}}
+                
+                # Prepare metadata
+                shape = list(np_array.shape)
+                dtype = str(np_array.dtype)
+                
+                logger.info(f"SendTensorWebSocket prepared tensor: shape={shape}, dtype={dtype}")
+                
+                # Construct the return value with simplified structure (no ui nesting)
+                success_output = {
+                    "comfystream_tensor_output": {
+                        "b64_data": b64_data,
+                        "shape": shape,
+                        "dtype": dtype
+                    }
+                }
+                
+                # Log the structure of the output (avoid logging the actual base64 data which is large)
+                output_structure = {
+                    "comfystream_tensor_output": {
+                        "b64_data": f"(base64 string of {b64_size} bytes)",
+                        "shape": shape,
+                        "dtype": dtype
+                    }
+                }
+                logger.info(f"SendTensorWebSocket returning SUCCESS data structure: {json.dumps(output_structure)}")
+                logger.debug("========== SendTensorWebSocket.save_tensor COMPLETED SUCCESSFULLY ==========")
+                
+                return success_output
+            
+            elif isinstance(tensor, np.ndarray):
+                logger.debug("Processing numpy.ndarray...")
+                logger.info(f"Input is numpy array: shape={tensor.shape}, dtype={tensor.dtype}")
+                
+                # Log memory details
+                logger.debug(f"NumPy array memory usage: {tensor.nbytes / 1024**2:.2f} MB")
+                logger.debug(f"First few values: {tensor.flatten()[:10].tolist()}")
+                
+                # Handle numpy array directly
+                logger.debug("Converting numpy array to bytes...")
+                try:
+                    tensor_bytes = tensor.tobytes()
+                    logger.debug(f"NumPy array converted to bytes: {len(tensor_bytes)} bytes")
+                except Exception as e:
+                    logger.error(f"Error converting numpy array to bytes: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"NumPy to bytes error: {str(e)}"}}
+                
+                logger.debug("Encoding bytes to base64...")
+                try:
+                    b64_data = base64.b64encode(tensor_bytes).decode('utf-8')
+                    b64_size = len(b64_data)
+                    logger.debug(f"Base64 encoding successful: {b64_size} characters")
+                    if b64_size > 100:
+                        logger.debug(f"Base64 sample: {b64_data[:50]}...{b64_data[-50:]}")
+                except Exception as e:
+                    logger.error(f"Error encoding numpy to base64: {e}")
+                    logger.error(traceback.format_exc())
+                    return {"comfystream_tensor_output": {"error": f"NumPy base64 encoding error: {str(e)}"}}
+                
+                shape = list(tensor.shape)
+                dtype = str(tensor.dtype)
+                
+                logger.debug("Constructing success output for numpy array...")
+                success_output = {
+                    "comfystream_tensor_output": {
+                        "b64_data": b64_data,
+                        "shape": shape,
+                        "dtype": dtype
+                    }
+                }
+                logger.info(f"SendTensorWebSocket returning SUCCESS from numpy array: shape={shape}, dtype={dtype}")
+                logger.debug("========== SendTensorWebSocket.save_tensor COMPLETED SUCCESSFULLY ==========")
+                return success_output
+                
+            elif isinstance(tensor, list):
+                logger.debug("Processing list input...")
+                logger.info(f"Input is a list of length {len(tensor)}")
+                
+                if len(tensor) > 0:
+                    first_item = tensor[0]
+                    logger.debug(f"First item type: {type(first_item)}")
+                    
+                    if isinstance(first_item, torch.Tensor):
+                        logger.debug("Processing first tensor from list...")
+                        logger.debug(f"First tensor details: shape={first_item.shape}, dtype={first_item.dtype}, device={first_item.device}")
+                        
+                        # Log first few values
+                        logger.debug(f"First few values: {first_item.flatten()[:10].tolist()}")
+                        
+                        # Process first tensor in the list
+                        try:
+                            logger.debug("Moving tensor to CPU and detaching...")
+                            np_array = first_item.cpu().detach().numpy()
+                            logger.debug(f"Conversion successful: shape={np_array.shape}, dtype={np_array.dtype}")
+                        except Exception as e:
+                            logger.error(f"Error processing first tensor in list: {e}")
+                            logger.error(traceback.format_exc())
+                            return {"comfystream_tensor_output": {"error": f"List tensor processing error: {str(e)}"}}
+                        
+                        try:
+                            logger.debug("Converting numpy array to bytes...")
+                            tensor_bytes = np_array.tobytes()
+                            logger.debug(f"Converted to bytes: {len(tensor_bytes)} bytes")
+                        except Exception as e:
+                            logger.error(f"Error converting list tensor to bytes: {e}")
+                            logger.error(traceback.format_exc())
+                            return {"comfystream_tensor_output": {"error": f"List tensor bytes conversion error: {str(e)}"}}
+                        
+                        try:
+                            logger.debug("Encoding bytes to base64...")
+                            b64_data = base64.b64encode(tensor_bytes).decode('utf-8')
+                            b64_size = len(b64_data)
+                            logger.debug(f"Base64 encoding successful: {b64_size} characters")
+                        except Exception as e:
+                            logger.error(f"Error encoding list tensor to base64: {e}")
+                            logger.error(traceback.format_exc())
+                            return {"comfystream_tensor_output": {"error": f"List tensor base64 encoding error: {str(e)}"}}
+                        
+                        shape = list(np_array.shape)
+                        dtype = str(np_array.dtype)
+                        
+                        logger.debug("Constructing success output for list tensor...")
+                        success_output = {
+                            "comfystream_tensor_output": {
+                                "b64_data": b64_data,
+                                "shape": shape,
+                                "dtype": dtype
+                            }
+                        }
+                        logger.info(f"SendTensorWebSocket returning SUCCESS from list's first tensor: shape={shape}, dtype={dtype}")
+                        logger.debug("========== SendTensorWebSocket.save_tensor COMPLETED SUCCESSFULLY ==========")
+                        return success_output
+                    else:
+                        logger.error(f"First item in list is not a tensor but {type(first_item)}")
+                        if hasattr(first_item, '__dict__'):
+                            logger.debug(f"First item attributes: {dir(first_item)}")
+                
+                # If we got here, couldn't process the list
+                logger.error(f"Unable to process list input: invalid content types")
+                list_types = [type(x).__name__ for x in tensor[:3]]
+                error_msg = f"Unsupported list content: {list_types}..."
+                logger.debug("========== SendTensorWebSocket.save_tensor FAILED ==========")
+                return {"comfystream_tensor_output": {"error": error_msg}}
+            
+            else:
+                # Unsupported type
+                error_msg = f"Unsupported tensor type: {type(tensor)}"
+                logger.error(error_msg)
+                if hasattr(tensor, '__dict__'):
+                    logger.debug(f"Tensor attributes: {dir(tensor)}")
+                
+                logger.debug("========== SendTensorWebSocket.save_tensor FAILED ==========")
+                return {"comfystream_tensor_output": {"error": error_msg}}
+
+        except Exception as e:
+            logger.exception(f"Error serializing tensor in SendTensorWebSocket: {e}")
+            
+            # Get detailed exception info
+            exc_type, exc_value, exc_traceback = sys.exc_info()
+            tb_lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
+            tb_text = ''.join(tb_lines)
+            logger.debug(f"Exception traceback:\n{tb_text}")
+            
+            error_output = {"comfystream_tensor_output": {"error": f"{str(e)} - See save_tensor_websocket_debug.log for details"}}
+            logger.info(f"SendTensorWebSocket returning ERROR data: {error_output}")
+            logger.debug("========== SendTensorWebSocket.save_tensor FAILED WITH EXCEPTION ==========")
+            return error_output
\ No newline at end of file
diff --git a/server/app_api.py b/server/app_api.py
index b5676aae..03fa4eda 100644
--- a/server/app_api.py
+++ b/server/app_api.py
@@ -372,8 +372,9 @@ async def on_shutdown(app: web.Application):
     )
     parser.add_argument("--host", default="127.0.0.1", help="Set the host")
     parser.add_argument(
-        "--log-level",
-        default="INFO",
+        "--log-level", "--log_level",
+        dest="log_level",
+        default="WARNING",
         choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
         help="Set the logging level",
     )
@@ -409,6 +410,9 @@ async def on_shutdown(app: web.Application):
         datefmt="%H:%M:%S",
     )
 
+    # Set logger level based on command line arguments
+    logger.setLevel(getattr(logging, args.log_level.upper()))
+
     app = web.Application()
     app["media_ports"] = args.media_ports.split(",") if args.media_ports else None
     app["config_file"] = args.config_file
diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index 1ff1826a..23381e81 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -5,7 +5,7 @@
 import logging
 import time
 import random
-from collections import deque, OrderedDict
+from collections import OrderedDict
 
 from typing import Any, Dict, Union, List, Optional, Deque
 from comfystream.client_api import ComfyStreamClient
@@ -75,7 +75,7 @@ async def _collect_processed_frames(self):
                                 # Check if result is already a tuple with frame_id
                                 if isinstance(result, tuple) and len(result) == 2:
                                     frame_id, out_tensor = result
-                                    # logger.info(f"Got result with embedded frame_id: {frame_id}")
+                                    logger.info(f"Got result with embedded frame_id: {frame_id}")
                                 else:
                                     out_tensor = result
                                     # Find which original frame this corresponds to using our mapping
@@ -96,7 +96,7 @@ async def _collect_processed_frames(self):
                                 
                                 # Remove the mapping
                                 self.client_frame_mapping.pop(frame_id, None)
-                                # logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
+                                logger.info(f"Collected processed frame from client {i}, frame_id: {frame_id}")
                             except asyncio.TimeoutError:
                                 # No frame ready yet, continue
                                 pass
@@ -137,7 +137,7 @@ async def _release_ordered_frames(self):
             
             # Put it in the output queue
             await self.processed_video_frames.put((self.next_expected_frame_id, tensor))
-            # logger.info(f"Released frame {self.next_expected_frame_id} to output queue")
+            logger.info(f"Released frame {self.next_expected_frame_id} to output queue")
             
             # Update the next expected frame ID to the next sequential ID if possible
             # (or the lowest frame ID in our buffer)
@@ -173,7 +173,7 @@ async def _check_frame_timeouts(self):
             
             # If we've waited too long, skip the missing frame(s)
             if wait_time_ms > self.max_frame_wait_ms:
-                # logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
+                logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
                 self.next_expected_frame_id = oldest_frame_id
                 await self._release_ordered_frames()
 
@@ -282,7 +282,7 @@ async def put_video_frame(self, frame: av.VideoFrame):
         # Also add to the incoming queue for reference
         await self.video_incoming_frames.put((frame_id, frame))
         
-        # logger.info(f"Sent frame {frame_id} to client {client_index}")
+        logger.debug(f"Sent frame {frame_id} to client {client_index}")
 
     async def put_audio_frame(self, frame: av.AudioFrame):
         # For now, only use the first client for audio
@@ -332,13 +332,13 @@ async def get_processed_video_frame(self):
                 # Get newer frame and mark old one as skipped
                 frame.side_data.skipped = True
                 frame_id, frame = await self.video_incoming_frames.get()
-                # logger.info(f"Skipped older frame {frame_id} to catch up")
+                logger.info(f"Skipped older frame {frame_id} to catch up")
             
             # Get the processed frame from our output queue
             processed_frame_id, out_tensor = await self.processed_video_frames.get()
             
             if processed_frame_id != frame_id:
-                # logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
+                logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
                 pass
             
             # Process the frame
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index f8725aed..2fd14bb1 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -106,9 +106,9 @@ async def run_prompt(self, prompt_index: int):
                     
                     # Wait for execution completion with timeout
                     try:
-                        # logger.info("Waiting for execution to complete (max 10 seconds)...")
+                        logger.info("Waiting for execution to complete (max 10 seconds)...")
                         await asyncio.wait_for(self.execution_complete_event.wait(), timeout=10.0)
-                        # logger.info("Execution complete, ready for next frame")
+                        logger.info("Execution complete, ready for next frame")
                     except asyncio.TimeoutError:
                         logger.error("Timeout waiting for execution, forcing continuation")
                         self.execution_complete_event.set()
@@ -139,9 +139,6 @@ async def _connect_websocket(self):
             
             logger.info(f"Connecting to WebSocket at {self.server_address}?clientId={self.client_id}")
             
-            # Set a reasonable timeout for connection
-            websocket_timeout = 10.0  # seconds
-            
             try:
                 # Connect with proper error handling
                 self.ws = await websockets.connect(
@@ -222,12 +219,12 @@ async def _handle_text_message(self, message: str):
             data = json.loads(message)
             message_type = data.get("type", "unknown")
 
-            # logger.info(f"Received message type: {message_type}")
+            logger.debug(f"Received message type: {message_type}")
+            logger.debug(f"{data}")
             
+            '''
             # Handle different message types
             if message_type == "status":
-                pass
-                '''
                 # Status message with comfy_ui's queue information
                 queue_remaining = data.get("data", {}).get("queue_remaining", 0)
                 exec_info = data.get("data", {}).get("exec_info", {})
@@ -235,20 +232,19 @@ async def _handle_text_message(self, message: str):
                     logger.info("Queue empty, no active execution")
                 else:
                     logger.info(f"Queue status: {queue_remaining} items remaining")
-                '''
                 
             elif message_type == "progress":
                 if "data" in data and "value" in data["data"]:
                     progress = data["data"]["value"]
                     max_value = data["data"].get("max", 100)
                     # Log the progress for debugging
-                    # logger.info(f"Progress: {progress}/{max_value}")
+                    logger.info(f"Progress: {progress}/{max_value}")
                 
             elif message_type == "execution_start":
                 self.execution_started = True
                 if "data" in data and "prompt_id" in data["data"]:
                     self._prompt_id = data["data"]["prompt_id"]
-                    # logger.info(f"Execution started for prompt {self._prompt_id}")
+                    logger.info(f"Execution started for prompt {self._prompt_id}")
                 
             elif message_type == "executing":
                 self.execution_started = True
@@ -257,18 +253,19 @@ async def _handle_text_message(self, message: str):
                         self._prompt_id = data["data"]["prompt_id"]
                     if "node" in data["data"]:
                         node_id = data["data"]["node"]
-                        # logger.info(f"Executing node: {node_id}")
+                        logger.info(f"Executing node: {node_id}")
             
             elif message_type in ["execution_cached", "execution_error", "execution_complete", "execution_interrupted"]:
-                # logger.info(f"{message_type} message received for prompt {self._prompt_id}")
+                logger.info(f"{message_type} message received for prompt {self._prompt_id}")
                 # self.execution_started = False
                 
                 # Always signal completion for these terminal states
                 # self.execution_complete_event.set()
-                # logger.info(f"Set execution_complete_event from {message_type}")
+                logger.info(f"Set execution_complete_event from {message_type}")
                 pass
+            '''
             
-            elif message_type == "executed":
+            if message_type == "executed":
                 # This is sent when a node is completely done
                 if "data" in data and "node_id" in data["data"]:
                     node_id = data["data"]["node_id"]
@@ -283,50 +280,9 @@ async def _handle_text_message(self, message: str):
                     elif self.execution_started and not self.execution_complete_event.is_set():
                         # Check if this was the last node
                         if data.get("data", {}).get("remaining", 0) == 0:
-                            # logger.info("All nodes executed but no tensor data received, forcing completion")
                             # self.execution_complete_event.set()
                             pass
-            
-            elif message_type == "executed_node" and "output" in data.get("data", {}):
-                node_id = data.get("data", {}).get("node_id")
-                output_data = data.get("data", {}).get("output", {})
-                prompt_id = data.get("data", {}).get("prompt_id", "unknown")
-                
-                logger.info(f"Node {node_id} executed in prompt {prompt_id}")
-                
-                '''
-                # Check if this is from ETN_SendImageWebSocket node
-                if "ui" in output_data and "images" in output_data["ui"]:
-                    images_info = output_data["ui"]["images"]
-                    logger.info(f"Found image output from ETN_SendImageWebSocket in node {node_id}")
-                    
-                    # Images will be received via binary websocket messages after this event
-                    # The binary handler will take care of them
-                    pass
-                
-                # Keep existing handling for tensor data
-                elif "ui" in output_data and "tensor" in output_data["ui"]:
-                    tensor_info = output_data["ui"]["tensor"]
-                    tensor_id = tensor_info.get("tensor_id", "unknown")
-                    logger.info(f"Found tensor data with ID: {tensor_id} in node {node_id}")
-                    
-                    # Decode the tensor data
-                    tensor_data = await self._decode_tensor_data(tensor_info)
-                    if tensor_data is not None:
-                        # Add to output queue without waiting to unblock event loop
-                        tensor_cache.image_outputs.put_nowait(tensor_data)
-                        logger.info(f"Added tensor to output queue, shape: {tensor_data.shape}")
-                        
-                        # IMPORTANT: Immediately signal that we can proceed with the next frame
-                        # when we receive tensor data, don't wait
-                        logger.info("Received tensor data, immediately signaling execution complete")
-                        self.execution_complete_event.set()
-                        logger.info("Set execution_complete_event after processing tensor data")
-                    else:
-                        logger.error("Failed to decode tensor data")
-                        # Signal completion even if decoding failed to prevent hanging
-                        self.execution_complete_event.set()
-                '''
+
         except json.JSONDecodeError:
             logger.error(f"Invalid JSON message: {message[:100]}...")
         except Exception as e:
@@ -366,18 +322,18 @@ async def _handle_binary_message(self, binary_data):
                 frame_id = None
                 if hasattr(self, '_prompt_id') and self._prompt_id in self._frame_id_mapping:
                     frame_id = self._frame_id_mapping.get(self._prompt_id)
-                    # logger.info(f"Using frame_id {frame_id} from prompt_id {self._prompt_id}")
+                    logger.info(f"Using frame_id {frame_id} from prompt_id {self._prompt_id}")
                 elif hasattr(self, '_current_frame_id') and self._current_frame_id is not None:
                     frame_id = self._current_frame_id
-                    # logger.info(f"Using current frame_id {frame_id}")
+                    logger.info(f"Using current frame_id {frame_id}")
                 
                 # Add to output queue - include frame_id if available
                 if frame_id is not None:
                     tensor_cache.image_outputs.put_nowait((frame_id, tensor))
-                    # logger.info(f"Added tensor with frame_id {frame_id} to output queue")
+                    logger.debug(f"Added tensor with frame_id {frame_id} to output queue")
                 else:
                     tensor_cache.image_outputs.put_nowait(tensor)
-                    #logger.info("Added tensor without frame_id to output queue")
+                    logger.debug("Added tensor without frame_id to output queue")
                 
                 self.execution_complete_event.set()
                 
@@ -525,7 +481,7 @@ async def _execute_prompt(self, prompt_index: int):
                             # Map prompt_id to frame_id for later retrieval
                             if frame_id is not None:
                                 self._frame_id_mapping[self._prompt_id] = frame_id
-                                # logger.info(f"Mapped prompt_id {self._prompt_id} to frame_id {frame_id}")
+                                logger.info(f"Mapped prompt_id {self._prompt_id} to frame_id {frame_id}")
                             
                             self.execution_started = True
                         else:
@@ -558,7 +514,7 @@ async def _send_tensor_via_websocket(self, tensor):
             # Prepare binary data
             if len(tensor.shape) == 4:  # BCHW format (batch of images)
                 if tensor.shape[0] > 1:
-                    # logger.info(f"Taking first image from batch of {tensor.shape[0]}")
+                    logger.info(f"Taking first image from batch of {tensor.shape[0]}")
                     pass
                 tensor = tensor[0]  # Take first image if batch
             
@@ -576,7 +532,7 @@ async def _send_tensor_via_websocket(self, tensor):
                 tensor = torch.zeros(3, 512, 512)
             
             # Check tensor dimensions and log detailed info
-            # logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
+            logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
             
             # Always ensure consistent 512x512 dimensions
             '''
@@ -623,7 +579,7 @@ async def _send_tensor_via_websocket(self, tensor):
             
             # Send binary data via websocket
             await self.ws.send(full_data)
-            # logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
+            logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
             
         except Exception as e:
             logger.error(f"Error sending tensor via websocket: {e}")
@@ -718,12 +674,12 @@ async def get_video_output(self):
         # Check if the result is a tuple with frame_id
         if isinstance(result, tuple) and len(result) == 2:
             frame_id, tensor = result
-            # logger.info(f"Got processed tensor from output queue with frame_id {frame_id}")
+            logger.info(f"Got processed tensor from output queue with frame_id {frame_id}")
             # Return both the frame_id and tensor to help with ordering in the pipeline
             return frame_id, tensor
         else:
             # If it's not a tuple with frame_id, just return the tensor
-            # logger.info("Got processed tensor from output queue without frame_id")
+            logger.info("Got processed tensor from output queue without frame_id")
             return result
     
     async def get_audio_output(self):
diff --git a/src/comfystream/utils_api.py b/src/comfystream/utils_api.py
index d207f4e3..ba31ccd4 100644
--- a/src/comfystream/utils_api.py
+++ b/src/comfystream/utils_api.py
@@ -15,7 +15,7 @@ def create_load_tensor_node():
         "_meta": {"title": "Load Tensor (API)"},
     }
 
-def create_load_image_node():
+def create_load_image_base64_node():
     return {
         "inputs": {
             "image": ""  # Should be "image" not "image_data" to match LoadImageBase64
@@ -44,7 +44,7 @@ def create_save_tensor_node(inputs: Dict[Any, Any]):
         "_meta": {"title": "Save Tensor (API)"},
     }
 
-def create_save_image_node(inputs: Dict[Any, Any]):
+def create_send_image_websocket_node(inputs: Dict[Any, Any]):
     # Get the correct image input reference
     images_input = inputs.get("images", inputs.get("image"))
     
@@ -61,6 +61,22 @@ def create_save_image_node(inputs: Dict[Any, Any]):
         "_meta": {"title": "Send Image Websocket (ComfyStream)"},
     }
 
+def create_send_tensor_websocket_node(inputs: Dict[Any, Any]):
+    # Get the correct image input reference
+    tensor_input = inputs.get("images", inputs.get("tensor"))
+    
+    if not tensor_input:
+        logging.warning("No valid tensor input found for SendTensorWebSocket node")
+        tensor_input = ["", 0]  # Default empty value
+    
+    return {
+        "inputs": {
+            "tensor": tensor_input
+        },
+        "class_type": "SendTensorWebSocket",
+        "_meta": {"title": "Save Tensor WebSocket (ComfyStream)"},
+    }
+
 def convert_prompt(prompt):
     logging.info("Converting prompt: %s", prompt)
 
@@ -100,7 +116,7 @@ def convert_prompt(prompt):
             num_primary_inputs += 1
         elif class_type in ["LoadImage", "LoadImageBase64"]:
             num_inputs += 1
-        elif class_type in ["PreviewImage", "SaveImage", "SendImageWebsocket"]:
+        elif class_type in ["PreviewImage", "SaveImage", "SendImageWebsocket", "SendTensorWebSocket"]:
             num_outputs += 1
 
     # Only handle single primary input
@@ -123,83 +139,16 @@ def convert_prompt(prompt):
 
     # Replace nodes with proper implementations
     for key in keys["PrimaryInputLoadImage"]:
-        prompt[key] = create_load_image_node()
+        prompt[key] = create_load_image_base64_node()
 
     if num_primary_inputs == 0 and len(keys["LoadImage"]) == 1:
-        prompt[keys["LoadImage"][0]] = create_load_image_node()
+        prompt[keys["LoadImage"][0]] = create_load_image_base64_node()
 
     for key in keys["PreviewImage"] + keys["SaveImage"]:
         node = prompt[key]
-        prompt[key] = create_save_image_node(node["inputs"])
+        # prompt[key] = create_save_image_node(node["inputs"]) 
+        prompt[key] = create_send_image_websocket_node(node["inputs"]) # TESTING
 
     # TODO: Validate the processed prompt input
             
     return prompt
-
-'''
-def convert_prompt(prompt: PromptDictInput) -> Prompt:
-    # Validate the schema
-    Prompt.validate(prompt)
-
-    prompt = copy.deepcopy(prompt)
-
-    num_primary_inputs = 0
-    num_inputs = 0
-    num_outputs = 0
-
-    keys = {
-        "PrimaryInputLoadImage": [],
-        "LoadImage": [],
-        "PreviewImage": [],
-        "SaveImage": [],
-    }
-    
-    for key, node in prompt.items():
-        class_type = node.get("class_type")
-
-        # Collect keys for nodes that might need to be replaced
-        if class_type in keys:
-            keys[class_type].append(key)
-
-        # Count inputs and outputs
-        if class_type == "PrimaryInputLoadImage":
-            num_primary_inputs += 1
-        elif class_type in ["LoadImage", "LoadTensor", "LoadAudioTensor"]:
-            num_inputs += 1
-        elif class_type in ["PreviewImage", "SaveImage", "SaveTensor", "SaveAudioTensor"]:
-            num_outputs += 1
-
-    # Only handle single primary input
-    if num_primary_inputs > 1:
-        raise Exception("too many primary inputs in prompt")
-
-    # If there are no primary inputs, only handle single input
-    if num_primary_inputs == 0 and num_inputs > 1:
-        raise Exception("too many inputs in prompt")
-
-    # Only handle single output for now
-    if num_outputs > 1:
-        raise Exception("too many outputs in prompt")
-
-    if num_primary_inputs + num_inputs == 0:
-        raise Exception("missing input")
-
-    if num_outputs == 0:
-        raise Exception("missing output")
-
-    # Replace nodes
-    for key in keys["PrimaryInputLoadImage"]:
-        prompt[key] = create_load_tensor_node()
-
-    if num_primary_inputs == 0 and len(keys["LoadImage"]) == 1:
-        prompt[keys["LoadImage"][0]] = create_load_tensor_node()
-
-    for key in keys["PreviewImage"] + keys["SaveImage"]:
-        node = prompt[key]
-        prompt[key] = create_save_tensor_node(node["inputs"])
-
-    # Validate the processed prompt input
-    prompt = Prompt.validate(prompt)
-
-    return prompt
-'''
\ No newline at end of file

From c4d2ea177d0cb6a136e390140f8da380109d190d Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 1 Apr 2025 15:10:48 -0400
Subject: [PATCH 10/14] Setting a few logs to debug.

---
 server/pipeline_api.py        | 2 +-
 src/comfystream/client_api.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index 23381e81..805f8734 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -75,7 +75,7 @@ async def _collect_processed_frames(self):
                                 # Check if result is already a tuple with frame_id
                                 if isinstance(result, tuple) and len(result) == 2:
                                     frame_id, out_tensor = result
-                                    logger.info(f"Got result with embedded frame_id: {frame_id}")
+                                    logger.debug(f"Got result with embedded frame_id: {frame_id}")
                                 else:
                                     out_tensor = result
                                     # Find which original frame this corresponds to using our mapping
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index 2fd14bb1..d4a2ed7c 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -106,9 +106,9 @@ async def run_prompt(self, prompt_index: int):
                     
                     # Wait for execution completion with timeout
                     try:
-                        logger.info("Waiting for execution to complete (max 10 seconds)...")
+                        logger.debug("Waiting for execution to complete (max 10 seconds)...")
                         await asyncio.wait_for(self.execution_complete_event.wait(), timeout=10.0)
-                        logger.info("Execution complete, ready for next frame")
+                        logger.debug("Execution complete, ready for next frame")
                     except asyncio.TimeoutError:
                         logger.error("Timeout waiting for execution, forcing continuation")
                         self.execution_complete_event.set()
@@ -322,10 +322,10 @@ async def _handle_binary_message(self, binary_data):
                 frame_id = None
                 if hasattr(self, '_prompt_id') and self._prompt_id in self._frame_id_mapping:
                     frame_id = self._frame_id_mapping.get(self._prompt_id)
-                    logger.info(f"Using frame_id {frame_id} from prompt_id {self._prompt_id}")
+                    logger.debug(f"Using frame_id {frame_id} from prompt_id {self._prompt_id}")
                 elif hasattr(self, '_current_frame_id') and self._current_frame_id is not None:
                     frame_id = self._current_frame_id
-                    logger.info(f"Using current frame_id {frame_id}")
+                    logger.debug(f"Using current frame_id {frame_id}")
                 
                 # Add to output queue - include frame_id if available
                 if frame_id is not None:

From 0ee6222c89183f80ed6f1ee4d9ce55bbf0a26bf8 Mon Sep 17 00:00:00 2001
From: Buff <marcotundo@gmail.com>
Date: Tue, 1 Apr 2025 15:14:12 -0400
Subject: [PATCH 11/14] Update requirements.txt

Co-authored-by: John | Elite Encoder <john@eliteencoder.net>
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 56fe8b22..24c6c32a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,6 @@ aiortc
 aiohttp
 toml
 tomli
+websockets
 twilio
 prometheus_client

From 61a03fad16fdfee881c24ef677edc69b2292a06a Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 1 Apr 2025 16:45:31 -0400
Subject: [PATCH 12/14] Added native nodes into root nodes.

---
 nodes/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nodes/__init__.py b/nodes/__init__.py
index 46aa93c6..4682f0d6 100644
--- a/nodes/__init__.py
+++ b/nodes/__init__.py
@@ -3,6 +3,7 @@
 from .audio_utils import *
 from .tensor_utils import *
 from .video_stream_utils import *
+from .native_utils import *
 from .api import *
 from .web import *
 
@@ -11,7 +12,7 @@
 NODE_DISPLAY_NAME_MAPPINGS = {}
 
 # Import and update mappings from submodules
-for module in [audio_utils, tensor_utils, video_stream_utils, api, web]:
+for module in [audio_utils, tensor_utils, video_stream_utils, api, web, native_utils]:
     if hasattr(module, 'NODE_CLASS_MAPPINGS'):
         NODE_CLASS_MAPPINGS.update(module.NODE_CLASS_MAPPINGS)
     if hasattr(module, 'NODE_DISPLAY_NAME_MAPPINGS'):

From 77e28ffd5205965e546389df4e238372a9784d87 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 8 Apr 2025 14:54:58 -0400
Subject: [PATCH 13/14] Rebuilt get_available_nodes using native ComfyUI api
 for retrofit to the ui, cleanup of tensor code.

---
 server/pipeline_api.py        |  17 +-
 src/comfystream/client_api.py | 326 +++++++++++++++++++---------------
 src/comfystream/utils_api.py  |   4 +-
 3 files changed, 190 insertions(+), 157 deletions(-)

diff --git a/server/pipeline_api.py b/server/pipeline_api.py
index 805f8734..fdf8a14e 100644
--- a/server/pipeline_api.py
+++ b/server/pipeline_api.py
@@ -173,7 +173,7 @@ async def _check_frame_timeouts(self):
             
             # If we've waited too long, skip the missing frame(s)
             if wait_time_ms > self.max_frame_wait_ms:
-                logger.warning(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
+                logger.debug(f"Missing frame {self.next_expected_frame_id}, skipping to {oldest_frame_id}")
                 self.next_expected_frame_id = oldest_frame_id
                 await self._release_ordered_frames()
 
@@ -338,7 +338,7 @@ async def get_processed_video_frame(self):
             processed_frame_id, out_tensor = await self.processed_video_frames.get()
             
             if processed_frame_id != frame_id:
-                logger.warning(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
+                logger.debug(f"Frame ID mismatch: expected {frame_id}, got {processed_frame_id}")
                 pass
             
             # Process the frame
@@ -373,13 +373,14 @@ async def get_processed_audio_frame(self):
         processed_frame.sample_rate = frame.sample_rate
         
         return processed_frame
-    
+
     async def get_nodes_info(self) -> Dict[str, Any]:
-        """Get information about nodes from the first client"""
-        if not self.clients:
-            return {}
-        return await self.clients[0].get_available_nodes()
-    
+        """Get information about all nodes in the current prompt including metadata."""
+        # Note that we pull the node info from the first client (as they should all be the same)
+        # TODO: This is just retrofitting the functionality of the comfy embedded client, there could be major improvements here
+        nodes_info = await self.clients[0].get_available_nodes()
+        return nodes_info
+
     async def cleanup(self):
         """Clean up all clients and background tasks"""
         self.running = False
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index d4a2ed7c..f47ebb3f 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -496,97 +496,6 @@ async def _execute_prompt(self, prompt_index: int):
             logger.error(f"Error executing prompt: {e}")
             self.execution_complete_event.set()
     
-    async def _send_tensor_via_websocket(self, tensor):
-        """Send tensor data via the websocket connection"""
-        try:
-            if self.ws is None:
-                logger.error("WebSocket not connected, cannot send tensor")
-                self.execution_complete_event.set()  # Prevent hanging
-                return
-            
-            # Convert the tensor to image format for sending
-            if isinstance(tensor, np.ndarray):
-                tensor = torch.from_numpy(tensor).float()
-            
-            # Ensure on CPU and correct format
-            tensor = tensor.detach().cpu().float()
-            
-            # Prepare binary data
-            if len(tensor.shape) == 4:  # BCHW format (batch of images)
-                if tensor.shape[0] > 1:
-                    logger.info(f"Taking first image from batch of {tensor.shape[0]}")
-                    pass
-                tensor = tensor[0]  # Take first image if batch
-            
-            # Ensure CHW format (3 channels)
-            if len(tensor.shape) == 3:
-                if tensor.shape[0] != 3 and tensor.shape[2] == 3:  # HWC format
-                    tensor = tensor.permute(2, 0, 1)  # Convert to CHW
-                elif tensor.shape[0] != 3:
-                    logger.warning(f"Tensor doesn't have 3 channels: {tensor.shape}. Creating standard tensor.")
-                    # Create a standard RGB tensor
-                    tensor = torch.zeros(3, 512, 512)
-            else:
-                logger.warning(f"Tensor has unexpected shape: {tensor.shape}. Creating standard tensor.")
-                # Create a standard RGB tensor
-                tensor = torch.zeros(3, 512, 512)
-            
-            # Check tensor dimensions and log detailed info
-            logger.info(f"Original tensor for WS: shape={tensor.shape}, min={tensor.min().item():.4f}, max={tensor.max().item():.4f}")
-            
-            # Always ensure consistent 512x512 dimensions
-            '''
-            if tensor.shape[1] != 512 or tensor.shape[2] != 512:
-                logger.info(f"Resizing tensor from {tensor.shape} to standard 512x512")
-                import torch.nn.functional as F
-                tensor = tensor.unsqueeze(0)  # Add batch dimension for interpolate
-                tensor = F.interpolate(tensor, size=(512, 512), mode='bilinear', align_corners=False)
-                tensor = tensor.squeeze(0)  # Remove batch dimension after resize
-            '''
-
-            # Check for NaN or Inf values
-            if torch.isnan(tensor).any() or torch.isinf(tensor).any():
-                logger.warning("Tensor contains NaN or Inf values! Replacing with zeros.")
-                tensor = torch.nan_to_num(tensor, nan=0.0, posinf=1.0, neginf=0.0)
-            
-            # Convert to image (HWC for PIL)
-            tensor_np = (tensor.permute(1, 2, 0) * 255).clamp(0, 255).numpy().astype(np.uint8)
-            img = Image.fromarray(tensor_np)
-            
-            logger.info(f"Converted to PIL image with dimensions: {img.size}")
-            
-            # Convert to PNG 
-            buffer = BytesIO()
-            img.save(buffer, format="PNG")
-            buffer.seek(0)
-            img_bytes = buffer.getvalue()
-            
-            # CRITICAL FIX: We need to send the binary data with a proper node ID prefix
-            # LoadTensorAPI node expects this header format to identify the target node
-            # The first 4 bytes are the message type (3 for binary tensor) and the next 4 are the node ID
-            # Since we don't know the exact node ID, we'll use a generic one that will be interpreted as 
-            # "send this to the currently waiting LoadTensorAPI node"
-            
-            # Build header (8 bytes total)
-            header = bytearray()
-            # Message type 3 (custom binary tensor data)
-            header.extend((3).to_bytes(4, byteorder='little'))
-            # Generic node ID (0 means "send to whatever node is waiting")
-            header.extend((0).to_bytes(4, byteorder='little'))
-            
-            # Combine header and image data
-            full_data = header + img_bytes
-            
-            # Send binary data via websocket
-            await self.ws.send(full_data)
-            logger.info(f"Sent tensor as PNG image via websocket with proper header, size: {len(full_data)} bytes, image dimensions: {img.size}")
-            
-        except Exception as e:
-            logger.error(f"Error sending tensor via websocket: {e}")
-            
-            # Signal execution complete in case of error
-            self.execution_complete_event.set()
-    
     async def cleanup(self):
         """Clean up resources"""
         async with self.cleanup_lock:
@@ -642,26 +551,10 @@ async def cleanup_queues(self):
         
         logger.info("Tensor queues cleared")
     
-    def put_video_input(self, tensor: Union[torch.Tensor, np.ndarray]):
-        """
-        Put a video TENSOR into the tensor cache for processing.
-        
-        Args:
-            tensor: Video frame as a tensor (or numpy array)
-        """
-        try:
-            # Only remove one frame if the queue is full (like in client.py)
-            if tensor_cache.image_inputs.full():
-                tensor_cache.image_inputs.get_nowait()
-            
-            # Ensure tensor is detached if it's a torch tensor
-            if isinstance(tensor, torch.Tensor):
-                tensor = tensor.detach()
-                
-            tensor_cache.image_inputs.put(tensor)
-            
-        except Exception as e:
-            logger.error(f"Error in put_video_input: {e}")
+    def put_video_input(self, frame):
+        if tensor_cache.image_inputs.full():
+            tensor_cache.image_inputs.get(block=True)
+        tensor_cache.image_inputs.put(frame)
     
     def put_audio_input(self, frame):
         """Put audio frame into tensor cache"""
@@ -685,44 +578,183 @@ async def get_video_output(self):
     async def get_audio_output(self):
         """Get processed audio frame from tensor cache"""
         return await tensor_cache.audio_outputs.get()
+    
+    async def get_available_nodes(self) -> Dict[int, Dict[str, Any]]:
+        """
+        Retrieves detailed information about the nodes used in the current prompts
+        by querying the ComfyUI /object_info API endpoint.
+
+        Returns:
+            A dictionary where keys are prompt indices and values are dictionaries
+            mapping node IDs to their information, matching the required UI format.
         
-    async def get_available_nodes(self):
-        """Get metadata and available nodes info for current prompts"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                url = f"{self.api_base_url}/object_info"
-                async with session.get(url) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        
-                        # Format node info similar to the embedded client response
-                        all_prompts_nodes_info = {}
-                        
-                        for prompt_index, prompt in enumerate(self.current_prompts):
-                            nodes_info = {}
+        The idea of this function is to replicate the functionality of comfy embedded client import_all_nodes_in_workspace
+        TODO: Why not support ckpt_name and lora_name as dropdown selectors on UI?
+        """
+
+        if not self.current_prompts:
+            logger.warning("No current prompts set. Cannot get node info.")
+            return {}
+
+        all_prompts_nodes_info: Dict[int, Dict[str, Any]] = {}
+        all_needed_class_types = set()
+
+        # Collect all unique class types across all prompts first
+        for prompt in self.current_prompts:
+            for node in prompt.values():
+                if isinstance(node, dict) and 'class_type' in node:
+                    all_needed_class_types.add(node['class_type'])
+
+        class_info_cache: Dict[str, Any] = {}
+
+        async with aiohttp.ClientSession() as session:
+            fetch_tasks = []
+            for class_type in all_needed_class_types:
+                api_url = f"{self.api_base_url}/object_info/{class_type}"
+                fetch_tasks.append(self._fetch_object_info(session, api_url, class_type))
+
+            results = await asyncio.gather(*fetch_tasks, return_exceptions=True)
+
+            # Populate cache from results
+            for result in results:
+                if isinstance(result, tuple) and len(result) == 2:
+                    class_type, info = result
+                    if info:
+                        class_info_cache[class_type] = info
+                elif isinstance(result, Exception):
+                    logger.error(f"An exception occurred during object_info fetch task: {result}")
+        
+        # Now, build the output structure for each prompt
+        for prompt_index, prompt in enumerate(self.current_prompts):
+            nodes_info: Dict[str, Any] = {}
+            for node_id, node_data in prompt.items():
+                if not isinstance(node_data, dict) or 'class_type' not in node_data:
+                    logger.debug(f"Skipping invalid node data for node_id {node_id} in prompt {prompt_index}")
+                    continue
+
+                class_type = node_data['class_type']
+                # Let's skip the native api i/o nodes for now, subject to change
+                if class_type in ['LoadImageBase64', 'SendImageWebsocket']:
+                    continue
+
+                node_info = {
+                    'class_type': class_type,
+                    'inputs': {}
+                }
+
+                specific_class_info = class_info_cache.get(class_type)
+
+                if specific_class_info and 'input' in specific_class_info:
+                    input_definitions = {}
+                    required_inputs = specific_class_info['input'].get('required', {})
+                    optional_inputs = specific_class_info['input'].get('optional', {})
+
+                    if isinstance(required_inputs, dict):
+                        input_definitions.update(required_inputs)
+                    if isinstance(optional_inputs, dict):
+                        input_definitions.update(optional_inputs)
+
+                    if 'inputs' in node_data and isinstance(node_data['inputs'], dict):
+                        for input_name, input_value in node_data['inputs'].items():
+                            input_def = input_definitions.get(input_name)
                             
-                            for node_id, node in prompt.items():
-                                class_type = node.get('class_type')
-                                if class_type:
-                                    nodes_info[node_id] = {
-                                        'class_type': class_type,
-                                        'inputs': {}
-                                    }
-                                    
-                                    if 'inputs' in node:
-                                        for input_name, input_value in node['inputs'].items():
-                                            nodes_info[node_id]['inputs'][input_name] = {
-                                                'value': input_value,
-                                                'type': 'unknown'  # We don't have type information
-                                            }
+                            # Format the input value as a tuple if it's a list with node references
+                            if isinstance(input_value, list) and len(input_value) == 2 and isinstance(input_value[0], str) and isinstance(input_value[1], int):
+                                input_value = tuple(input_value)  # Convert [node_id, output_index] to (node_id, output_index)
+
+                            # Create Enum-like objects for certain types
+                            def create_enum_format(type_name):
+                                # Format the type as <IO.TYPE_NAME: 'TYPE_NAME'>
+                                return f"<IO.{type_name}: '{type_name}'>"
                             
-                            all_prompts_nodes_info[prompt_index] = nodes_info
-                        
-                        return all_prompts_nodes_info
-                        
+                            input_details = {
+                                'value': input_value,
+                                'type': 'unknown',  # Default type
+                                'min': None, 
+                                'max': None,
+                                'widget': None  # Default, all widgets should be None to match format
+                            }
+
+                            # Parse the definition tuple/list if valid
+                            if isinstance(input_def, (list, tuple)) and len(input_def) > 0:
+                                config = None
+                                # Check for config dict as the second element
+                                if len(input_def) > 1 and isinstance(input_def[1], dict):
+                                    config = input_def[1]
+
+                                # Check for COMBO type (first element is list/tuple of options)
+                                if input_name in ['ckpt_name', 'lora_name']:
+                                    # For checkpoint and lora names, use STRING type instead of combo list
+                                    input_details['type'] = create_enum_format('STRING')
+                                elif isinstance(input_def[0], (list, tuple)):
+                                    input_details['type'] = input_def[0]  # Type is the list of options
+                                    # Don't set widget for combo
+                                else:
+                                    # Regular type (string or enum)
+                                    input_type_raw = input_def[0]
+                                    # Keep raw type name for certain types to match format
+                                    if hasattr(input_type_raw, 'name'):
+                                        # Special handling for CLIP and STRING to match expected format
+                                        type_name = str(input_type_raw.name)
+                                        if type_name in ('CLIP', 'STRING'):
+                                            # Create Enum-like format that matches format in desired output
+                                            input_details['type'] = create_enum_format(type_name)
+                                        else:
+                                            input_details['type'] = type_name
+                                    else:
+                                        # For non-enum types
+                                        input_details['type'] = str(input_type_raw)
+
+                                    # Extract constraints/widget from config if it exists
+                                    if config:
+                                        for key in ['min', 'max']:  # Only include these, skip widget/step/round
+                                            if key in config:
+                                                input_details[key] = config[key]
+
+                            node_info['inputs'][input_name] = input_details
                     else:
-                        logger.error(f"Error getting node info: {response.status}")
-                        return {}
+                        logger.debug(f"Node {node_id} ({class_type}) has no 'inputs' dictionary.")
+                elif class_type not in class_info_cache:
+                    logger.warning(f"No cached info found for class_type: {class_type} (node_id: {node_id}).")
+                else:
+                    logger.debug(f"Class info for {class_type} does not contain an 'input' key.")
+                    # If class info exists but no 'input' key, still add node with empty inputs dict
+
+                nodes_info[node_id] = node_info
+            
+            # Only add if there are any nodes after filtering
+            if nodes_info:
+                all_prompts_nodes_info[prompt_index] = nodes_info
+
+        return all_prompts_nodes_info
+
+    async def _fetch_object_info(self, session: aiohttp.ClientSession, url: str, class_type: str) -> Optional[tuple[str, Any]]:
+        """Helper function to fetch object info for a single class type."""
+        try:
+            logger.debug(f"Fetching object info for: {class_type} from {url}")
+            async with session.get(url) as response:
+                if response.status == 200:
+                    try:
+                        data = await response.json()
+                        # Extract the actual node info from the nested structure
+                        if class_type in data and isinstance(data[class_type], dict):
+                            node_specific_info = data[class_type]
+                            logger.debug(f"Successfully fetched and extracted info for {class_type}")
+                            return class_type, node_specific_info
+                        else:
+                             logger.error(f"Unexpected response structure for {class_type}. Key missing or not a dict. Response: {data}")
+
+                    except aiohttp.ContentTypeError:
+                         logger.error(f"Failed to decode JSON for {class_type}. Status: {response.status}, Content-Type: {response.headers.get('Content-Type')}, Response: {await response.text()[:200]}...") # Log beginning of text
+                    except json.JSONDecodeError as e:
+                        logger.error(f"Invalid JSON received for {class_type}. Status: {response.status}, Error: {e}, Response: {await response.text()[:200]}...")
+                else:
+                    error_text = await response.text()
+                    logger.error(f"Error fetching info for {class_type}: {response.status} - {error_text[:200]}...")
+        except aiohttp.ClientError as e:
+            logger.error(f"HTTP client error fetching info for {class_type} ({url}): {e}")
         except Exception as e:
-            logger.error(f"Error getting node info: {str(e)}")
-            return {}
\ No newline at end of file
+            logger.error(f"Unexpected error fetching info for {class_type} ({url}): {e}")
+
+        # Return class_type and None if any error occurred
+        return class_type, None
\ No newline at end of file
diff --git a/src/comfystream/utils_api.py b/src/comfystream/utils_api.py
index ba31ccd4..dbbb6790 100644
--- a/src/comfystream/utils_api.py
+++ b/src/comfystream/utils_api.py
@@ -2,9 +2,9 @@
 import random
 
 from typing import Dict, Any
-# from comfy.api.components.schema.prompt import Prompt, PromptDictInput
 
 import logging
+logger = logging.getLogger(__name__)
 
 def create_load_tensor_node():
     return {
@@ -102,7 +102,7 @@ def convert_prompt(prompt):
             # Generate a random seed (same range as JavaScript's Math.random() * 18446744073709552000)
             random_seed = random.randint(0, 18446744073709551615)
             node["inputs"]["seed"] = random_seed
-            print(f"Set random seed {random_seed} for node {key}")
+            logger.debug(f"Set random seed {random_seed} for node {key}")
     
     for key, node in prompt.items():
         class_type = node.get("class_type")

From 13b511a3de0ce48dbadcfb9328452a78e7b4e846 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Tue, 8 Apr 2025 16:39:35 -0400
Subject: [PATCH 14/14] Modified base64 processing to use torchvision instead
 of numpy intermediate step, moved prompt execution strategy to
 `execution_start` event, moved buffer to self variable to avoid
 reinitalization.

---
 requirements.txt              |   1 +
 src/comfystream/client_api.py | 134 ++++++++++++++++++++--------------
 2 files changed, 80 insertions(+), 55 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 24c6c32a..f8d1b46d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ tomli
 websockets
 twilio
 prometheus_client
+torchvision
diff --git a/src/comfystream/client_api.py b/src/comfystream/client_api.py
index f47ebb3f..5dc9ace1 100644
--- a/src/comfystream/client_api.py
+++ b/src/comfystream/client_api.py
@@ -15,11 +15,12 @@
 
 from comfystream import tensor_cache
 from comfystream.utils_api import convert_prompt
+from torchvision.transforms.functional import to_pil_image
 
 logger = logging.getLogger(__name__)
 
 class ComfyStreamClient:
-    def __init__(self, host: str = "127.0.0.1", port: int = 8198, **kwargs):
+    def __init__(self, host: str = "127.0.0.1", port: int = 8188, **kwargs):
         """
         Initialize the ComfyStream client to use the ComfyUI API.
         
@@ -38,11 +39,10 @@ def __init__(self, host: str = "127.0.0.1", port: int = 8198, **kwargs):
         self.current_prompts = []
         self.running_prompts = {}
         self.cleanup_lock = asyncio.Lock()
-        
+        self.buffer = BytesIO()
         # WebSocket connection
         self._ws_listener_task = None
         self.execution_complete_event = asyncio.Event()
-        self.execution_started = False
         self._prompt_id = None
         
         # Add frame tracking
@@ -219,11 +219,22 @@ async def _handle_text_message(self, message: str):
             data = json.loads(message)
             message_type = data.get("type", "unknown")
 
-            logger.debug(f"Received message type: {message_type}")
-            logger.debug(f"{data}")
-            
+            # logger.info(f"Received message type: {message_type}")
+            logger.info(f"{data}")
+
+            # Example output
+            '''
+            15:15:58 [INFO] Received message type: executing
+            15:15:58 [INFO] {'type': 'executing', 'data': {'node': '18', 'display_node': '18', 'prompt_id': '6f983049-dca4-4935-9f36-d2bff7b744fa'}}
+            15:15:58 [INFO] Received message type: executed
+            15:15:58 [INFO] {'type': 'executed', 'data': {'node': '18', 'display_node': '18', 'output': {'images': [{'source': 'websocket', 'content-type': 'image/png', 'type': 'output'}]}, 'prompt_id': '6f983049-dca4-4935-9f36-d2bff7b744fa'}}
+            15:15:58 [INFO] Received message type: execution_success
+            15:15:58 [INFO] {'type': 'execution_success', 'data': {'prompt_id': '6f983049-dca4-4935-9f36-d2bff7b744fa', 'timestamp': 1744139758250}}
+            '''
+
+            # Handle different message types to have fun with!
+
             '''
-            # Handle different message types
             if message_type == "status":
                 # Status message with comfy_ui's queue information
                 queue_remaining = data.get("data", {}).get("queue_remaining", 0)
@@ -232,56 +243,60 @@ async def _handle_text_message(self, message: str):
                     logger.info("Queue empty, no active execution")
                 else:
                     logger.info(f"Queue status: {queue_remaining} items remaining")
-                
-            elif message_type == "progress":
+            '''
+
+            '''
+            if message_type == "progress":
                 if "data" in data and "value" in data["data"]:
                     progress = data["data"]["value"]
                     max_value = data["data"].get("max", 100)
                     # Log the progress for debugging
                     logger.info(f"Progress: {progress}/{max_value}")
-                
-            elif message_type == "execution_start":
-                self.execution_started = True
+            '''
+
+            if message_type == "execution_start":
                 if "data" in data and "prompt_id" in data["data"]:
                     self._prompt_id = data["data"]["prompt_id"]
                     logger.info(f"Execution started for prompt {self._prompt_id}")
-                
-            elif message_type == "executing":
-                self.execution_started = True
+
+                    # Let's queue the next prompt here!
+                    self.execution_complete_event.set()
+
+            '''
+            if message_type == "executing":
                 if "data" in data:
                     if "prompt_id" in data["data"]:
                         self._prompt_id = data["data"]["prompt_id"]
                     if "node" in data["data"]:
                         node_id = data["data"]["node"]
                         logger.info(f"Executing node: {node_id}")
-            
-            elif message_type in ["execution_cached", "execution_error", "execution_complete", "execution_interrupted"]:
-                logger.info(f"{message_type} message received for prompt {self._prompt_id}")
-                # self.execution_started = False
-                
+
+                        # Let's check which node_id is a LoadImageBase64 node
+                    # and set the execution complete event for that node    
+                    for prompt_index, prompt in enumerate(self.current_prompts):
+                        for node_id, node in prompt.items():
+                            if (node_id == executing_node_id and isinstance(node, dict) and node.get("class_type") in ["LoadImageBase64"]):
+                                logger.info(f"Setting execution complete event for LoadImageBase64 node {node_id}")
+                                self.execution_complete_event.set()
+                                break
+            '''
+
+            '''
+            if message_type == "executed":
+                # This is sent when a node is completely done
+                if "data" in data and "node" in data["data"]:
+                    node_id = data["data"]["node"]
+                    logger.info(f"Node execution complete: {node_id}")
+            '''
+
+            '''
+            if message_type in ["execution_cached", "execution_error", "execution_complete", "execution_interrupted"]:
+                logger.info(f"{message_type} message received for prompt {self._prompt_id}")                
                 # Always signal completion for these terminal states
                 # self.execution_complete_event.set()
                 logger.info(f"Set execution_complete_event from {message_type}")
                 pass
             '''
-            
-            if message_type == "executed":
-                # This is sent when a node is completely done
-                if "data" in data and "node_id" in data["data"]:
-                    node_id = data["data"]["node_id"]
-                    logger.info(f"Node execution complete: {node_id}")
-                    
-                    # Check if this is our SaveTensorAPI node
-                    if "SaveTensorAPI" in str(node_id):
-                        logger.info("SaveTensorAPI node executed, checking for tensor data")
-                        # The binary data should come separately via websocket
-                    
-                    # If we've been running for too long without tensor data, force completion
-                    elif self.execution_started and not self.execution_complete_event.is_set():
-                        # Check if this was the last node
-                        if data.get("data", {}).get("remaining", 0) == 0:
-                            # self.execution_complete_event.set()
-                            pass
 
         except json.JSONDecodeError:
             logger.error(f"Invalid JSON message: {message[:100]}...")
@@ -295,7 +310,7 @@ async def _handle_binary_message(self, binary_data):
         try:
             # Early return if message is too short
             if len(binary_data) <= 8:
-                self.execution_complete_event.set()
+                # self.execution_complete_event.set()
                 return
             
             # Extract header data only when needed
@@ -306,7 +321,7 @@ async def _handle_binary_message(self, binary_data):
             # Quick check for image format
             is_image = data[:2] in [b'\xff\xd8', b'\x89\x50']
             if not is_image:
-                self.execution_complete_event.set()
+                # self.execution_complete_event.set()
                 return
             
             # Process image data directly
@@ -335,15 +350,16 @@ async def _handle_binary_message(self, binary_data):
                     tensor_cache.image_outputs.put_nowait(tensor)
                     logger.debug("Added tensor without frame_id to output queue")
                 
-                self.execution_complete_event.set()
+                # We will execute the next prompt from message_type == "execution_start" instead
+                # self.execution_complete_event.set()
                 
             except Exception as img_error:
                 logger.error(f"Error processing image: {img_error}")
-                self.execution_complete_event.set()
+                # self.execution_complete_event.set()
                 
         except Exception as e:
             logger.error(f"Error handling binary message: {e}")
-            self.execution_complete_event.set()
+            # self.execution_complete_event.set()
     
     async def _execute_prompt(self, prompt_index: int):
         try:
@@ -430,25 +446,35 @@ async def _execute_prompt(self, prompt_index: int):
                                 tensor, size=(512, 512), mode='bilinear', align_corners=False
                             )
                             tensor = tensor[0]  # Remove batch dimension
-                            
+
+                            # ====
+                            # PIL method
+                            '''
                             # Direct conversion to PIL without intermediate numpy step for speed
                             tensor_np = (tensor.permute(1, 2, 0).clamp(0, 1) * 255).to(torch.uint8).numpy()
                             img = Image.fromarray(tensor_np)
+                            img.save(self.buffer, format="JPEG", quality=90, optimize=True)
+                            '''
                             
-                            # Fast JPEG encoding with balanced quality
-                            buffer = BytesIO()
-                            img.save(buffer, format="JPEG", quality=90, optimize=True)
-                            buffer.seek(0)
-                            img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                            # ====
+                            # torchvision method (more performant - TODO: need to test further)
+                            # Direct conversion to PIL without intermediate numpy step
+                            # Fast JPEG encoding with reduced quality for better performance
+                            tensor_pil = to_pil_image(tensor.clamp(0, 1))
+                            tensor_pil.save(self.buffer, format="JPEG", quality=75, optimize=True)
+                            # ====
+
+                            self.buffer.seek(0)
+                            img_base64 = base64.b64encode(self.buffer.getvalue()).decode('utf-8')
                             
                         except Exception as e:
                             logger.warning(f"Error in tensor processing: {e}, creating fallback image")
                             # Create a standard 512x512 placeholder if anything fails
                             img = Image.new('RGB', (512, 512), color=(100, 149, 237))
-                            buffer = BytesIO()
-                            img.save(buffer, format="JPEG", quality=90)
-                            buffer.seek(0)
-                            img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                            self.buffer = BytesIO()
+                            img.save(self.buffer, format="JPEG", quality=90)
+                            self.buffer.seek(0)
+                            img_base64 = base64.b64encode(self.buffer.getvalue()).decode('utf-8')
                         
                         # Add timestamp for cache busting (once, outside the try/except)
                         timestamp = int(time.time() * 1000)
@@ -482,8 +508,6 @@ async def _execute_prompt(self, prompt_index: int):
                             if frame_id is not None:
                                 self._frame_id_mapping[self._prompt_id] = frame_id
                                 logger.info(f"Mapped prompt_id {self._prompt_id} to frame_id {frame_id}")
-                            
-                            self.execution_started = True
                         else:
                             error_text = await response.text()
                             logger.error(f"Error queueing prompt: {response.status} - {error_text}")