OpenAI GPT Image 1 - ComfyUI 原生节点文档

此节点连接到OpenAI的 GPT Image 1 API，让用户能够通过详细的文本提示词生成图像。GPT Image 1与传统的DALL·E模型不同，它利用了GPT-4的语言理解能力，可以处理更复杂和语境丰富的提示词，生成更符合用户意图的图像内容。

参数说明

基本参数

参数	类型	默认值	说明
prompt	字符串	""	详细描述要生成内容的文本提示词
quality	选择项	”low”	图像质量级别，可选值：“low”, “medium”, “high”
size	选择项	”auto”	输出图像尺寸，可选值：“auto”, “1024x1024”, “1024x1536”, “1536x1024”

图像编辑参数

参数	类型	说明
image	图像	用于图像编辑的输入图像，支持批量输入多张图像
mask	掩码	指定图像中要修改的区域(可选)，使用掩码时只能输入单张图像

可选参数

参数	类型	说明
background	选择项	背景处理选项，可选值：“opaque”(不透明), “transparent”(透明)
seed	整数	生成的随机种子，当前在后端尚未实现
n	整数	生成的图像数量，范围1-8

输出

输出	类型	说明
IMAGE	图像	生成的图像结果

工作原理

OpenAI GPT Image 1 节点结合了GPT-4的语言理解能力和图像生成技术。它首先分析用户提供的文本提示词，理解其语义内容和意图，然后生成符合描述的图像。当提供输入图像时，节点可以在图像编辑模式下工作，允许对现有图像进行修改。通过额外提供掩码，用户可以精确控制哪些区域应该被修改，哪些应该保持不变。注意使用掩码时，只能提供单张图像输入。用户可以通过调整各种参数来控制生成结果，包括质量级别、尺寸、背景处理和生成数量。

源码参考

[节点源码 (更新于2025-05-03)]


class OpenAIGPTImage1(ComfyNodeABC):
    """
    Generates images synchronously via OpenAI's GPT Image 1 endpoint.

    Uses the proxy at /proxy/openai/images/generations. Returned URLs are short‑lived,
    so download or cache results if you need to keep them.
    """

    def __init__(self):
        pass

    @classmethod
    def INPUT_TYPES(cls) -> InputTypeDict:
        return {
            "required": {
                "prompt": (
                    IO.STRING,
                    {
                        "multiline": True,
                        "default": "",
                        "tooltip": "Text prompt for GPT Image 1",
                    },
                ),
            },
            "optional": {
                "seed": (
                    IO.INT,
                    {
                        "default": 0,
                        "min": 0,
                        "max": 2**31 - 1,
                        "step": 1,
                        "display": "number",
                        "control_after_generate": True,
                        "tooltip": "not implemented yet in backend",
                    },
                ),
                "quality": (
                    IO.COMBO,
                    {
                        "options": ["low", "medium", "high"],
                        "default": "low",
                        "tooltip": "Image quality, affects cost and generation time.",
                    },
                ),
                "background": (
                    IO.COMBO,
                    {
                        "options": ["opaque", "transparent"],
                        "default": "opaque",
                        "tooltip": "Return image with or without background",
                    },
                ),
                "size": (
                    IO.COMBO,
                    {
                        "options": ["auto", "1024x1024", "1024x1536", "1536x1024"],
                        "default": "auto",
                        "tooltip": "Image size",
                    },
                ),
                "n": (
                    IO.INT,
                    {
                        "default": 1,
                        "min": 1,
                        "max": 8,
                        "step": 1,
                        "display": "number",
                        "tooltip": "How many images to generate",
                    },
                ),
                "image": (
                    IO.IMAGE,
                    {
                        "default": None,
                        "tooltip": "Optional reference image for image editing.",
                    },
                ),
                "mask": (
                    IO.MASK,
                    {
                        "default": None,
                        "tooltip": "Optional mask for inpainting (white areas will be replaced)",
                    },
                ),
            },
            "hidden": {"auth_token": "AUTH_TOKEN_COMFY_ORG"},
        }

    RETURN_TYPES = (IO.IMAGE,)
    FUNCTION = "api_call"
    CATEGORY = "api node/image/openai"
    DESCRIPTION = cleandoc(__doc__ or "")
    API_NODE = True

    def api_call(
        self,
        prompt,
        seed=0,
        quality="low",
        background="opaque",
        image=None,
        mask=None,
        n=1,
        size="1024x1024",
        auth_token=None,
    ):
        model = "gpt-image-1"
        path = "/proxy/openai/images/generations"
        content_type="application/json"
        request_class = OpenAIImageGenerationRequest
        img_binaries = []
        mask_binary = None
        files = []

        if image is not None:
            path = "/proxy/openai/images/edits"
            request_class = OpenAIImageEditRequest
            content_type ="multipart/form-data"

            batch_size = image.shape[0]

            for i in range(batch_size):
                single_image = image[i : i + 1]
                scaled_image = downscale_image_tensor(single_image).squeeze()

                image_np = (scaled_image.numpy() * 255).astype(np.uint8)
                img = Image.fromarray(image_np)
                img_byte_arr = io.BytesIO()
                img.save(img_byte_arr, format="PNG")
                img_byte_arr.seek(0)
                img_binary = img_byte_arr
                img_binary.name = f"image_{i}.png"

                img_binaries.append(img_binary)
                if batch_size == 1:
                    files.append(("image", img_binary))
                else:
                    files.append(("image[]", img_binary))

        if mask is not None:
            if image.shape[0] != 1:
                raise Exception("Cannot use a mask with multiple image")
            if image is None:
                raise Exception("Cannot use a mask without an input image")
            if mask.shape[1:] != image.shape[1:-1]:
                raise Exception("Mask and Image must be the same size")
            batch, height, width = mask.shape
            rgba_mask = torch.zeros(height, width, 4, device="cpu")
            rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()

            scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()

            mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
            mask_img = Image.fromarray(mask_np)
            mask_img_byte_arr = io.BytesIO()
            mask_img.save(mask_img_byte_arr, format="PNG")
            mask_img_byte_arr.seek(0)
            mask_binary = mask_img_byte_arr
            mask_binary.name = "mask.png"
            files.append(("mask", mask_binary))

        # Build the operation
        operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=path,
                method=HttpMethod.POST,
                request_model=request_class,
                response_model=OpenAIImageGenerationResponse,
            ),
            request=request_class(
                model=model,
                prompt=prompt,
                quality=quality,
                background=background,
                n=n,
                seed=seed,
                size=size,
            ),
            files=files if files else None,
            content_type=content_type,
            auth_token=auth_token,
        )

        response = operation.execute()

        img_tensor = validate_and_cast_response(response)
        return (img_tensor,)

条件

图像

加载器

潜变量

高级

采样

3D

API 节点

OpenAI GPT Image 1 - ComfyUI 原生节点文档

参数说明

基本参数

图像编辑参数

可选参数

输出

工作原理

源码参考

条件

图像

加载器

潜变量

高级

采样

3D

API 节点

​参数说明

​基本参数

​图像编辑参数

​可选参数

​输出

​工作原理

​源码参考

参数说明

基本参数

图像编辑参数

可选参数

输出

工作原理

源码参考