# Generate Studio Video

Generates videos using the AI Studio backend with support for avatars, voices, and dynamic backgrounds.  You can create videos using either your photo avatar or digital twin. This endpoint supports Avatar III and Avatar IV.

<Callout icon="📘" theme="info">
  **Note:**

  * Scroll down to the **Responses** section below and expand the status code(s) to view the detailed response schema.
  * Refer to the **RESPONSE** section on the right and choose an example to see how the API response appears for each status code.
</Callout>

# OpenAPI definition

```json
{
  "openapi": "3.1.0",
  "info": {
    "title": "heygen-api",
    "version": "4.0.8"
  },
  "servers": [
    {
      "url": "https://api.heygen.com"
    }
  ],
  "components": {
    "securitySchemes": {
      "sec0": {
        "type": "apiKey",
        "name": "x-api-key",
        "in": "header",
        "x-default": "<your-api-key>"
      }
    }
  },
  "security": [
    {
      "sec0": []
    }
  ],
  "paths": {
    "/v2/video/generate": {
      "post": {
        "summary": "Create Avatar Video (V2)",
        "description": "This API now generates videos with our New AI Studio backend.",
        "operationId": "create-an-avatar-video-v2",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": [
                  "video_inputs"
                ],
                "properties": {
                  "caption": {
                    "type": "boolean",
                    "description": "Whether to enable captions in the video. Only supported for text-based input.",
                    "default": "false"
                  },
                  "title": {
                    "type": "string",
                    "description": "Title of this video"
                  },
                  "callback_id": {
                    "type": "string",
                    "description": "Custom ID for callback purposes. Returned in the status/webhook payload for tracking."
                  },
                  "video_inputs": {
                    "type": "array",
                    "items": {
                      "properties": {
                        "character": {
                          "type": "object",
                          "description": "Could be an `avatar` or `talking photo`.",
                          "properties": {
                            "type": {
                              "type": "string",
                              "description": "Could be either `avatar` or `talking_photo`(for photo avatars).",
                              "default": "avatar",
                              "enum": [
                                "avatar",
                                "talking_photo"
                              ]
                            },
                            "avatar_id": {
                              "type": "string",
                              "description": "Unique identifier of the avatar. Can be retrieved from <a href=\"https://docs.heygen.com/reference/list-avatars-v2\" target=\"_blank\">List All Avatars (V2)</a> endpoint. "
                            },
                            "talking_photo_id": {
                              "type": "string",
                              "description": "Unique identifier of the talking photo. Can be retrieved from <a href=\"https://docs.heygen.com/reference/list-avatars-v2\" target=\"_blank\">List All Avatars (V2)</a> endpoint.  "
                            },
                            "scale": {
                              "type": "number",
                              "description": "Adjusts the size of the avatar or talking photo. Value ranges from `0.0` - `5.0`. \nUse the <a href=\"https://docs.heygen.com/page/tools-1\" target=\"_blank\">Avatar Positioning</a> tool for easier adjustment. _Applies to both `avatar` and `talking_photo` type._",
                              "default": 1,
                              "format": "float"
                            },
                            "avatar_style": {
                              "type": "string",
                              "description": "Visual style of the avatar. _Applies only to `avatar` type._",
                              "default": "normal",
                              "enum": [
                                "circle",
                                "closeUp",
                                "normal"
                              ]
                            },
                            "talking_photo_style": {
                              "type": "string",
                              "description": "Visual style of the talking photo. _Applies only to `talking_photo` type._ ",
                              "enum": [
                                "circle"
                              ]
                            },
                            "use_avatar_iv_model": {
                              "type": "boolean",
                              "description": "Whether to use avatar iv."
                            },
                            "prompt": {
                              "type": "string",
                              "description": "Avatar 4 motion prompt. _Applies to `talking_photo` type and only when use_avatar_iv_model is true._"
                            },
                            "keep_original_prompt": {
                              "type": "boolean",
                              "description": "Wether to preserve avatar 4 motion prompt as is or allow for prompt enhancement. If this value is false or not provided, motion prompt will be enhanced by default. _Applies to `talking_photo` type and only when use_avatar_iv_model is true._"
                            },
                            "offset": {
                              "type": "object",
                              "description": "Specifies the horizontal (x) and vertical (y) position adjustment of the avatar or talking photo within the video frame. Default is { \"x\": 0.0, \"y\": 0.0 }, which centers the character. \nUse the <a href=\"https://docs.heygen.com/page/tools-1\" target=\"_blank\">Avatar Positioning</a> tool for easier adjustment. _Applies to both `avatar` and `talking_photo` type._",
                              "properties": {
                                "x": {
                                  "type": "number",
                                  "format": "float",
                                  "description": "Horizontal position."
                                },
                                "y": {
                                  "type": "number",
                                  "format": "float",
                                  "description": "Vertical position. "
                                }
                              }
                            },
                            "talking_style": {
                              "type": "string",
                              "description": "Supported talking style of the Talking Photo. _Applies only to the `talking_photo` type._",
                              "enum": [
                                "stable",
                                "expressive"
                              ],
                              "default": "stable"
                            },
                            "expression": {
                              "type": "string",
                              "description": "Supported expressions of the talking photo. _Applies only to the `talking_photo` type._",
                              "enum": [
                                "default",
                                "happy"
                              ]
                            },
                            "super_resolution": {
                              "type": "boolean",
                              "description": "Whether to enhance the image quality. _Applies only to the `talking_photo` type._"
                            },
                            "matting": {
                              "type": "boolean",
                              "description": "Whether to remove the photo background. _Applies to both `avatar` and `talking_photo` type._"
                            },
                            "circle_background_color": {
                              "type": "string",
                              "description": "Background color when using the circle style. Must be specified in hex format (e.g., <code>#FFFFFF</code>). _Applies to both `avatar` and `talking_photo` type._"
                            }
                          },
                          "required": [
                            "type",
                            "avatar_id",
                            "talking_photo_id"
                          ]
                        },
                        "voice": {
                          "type": "object",
                          "description": "Could be text, audio, or silence. ",
                          "properties": {
                            "type": {
                              "type": "string",
                              "description": "Format of the voice input. ",
                              "enum": [
                                "text",
                                "audio",
                                "silence"
                              ]
                            },
                            "voice_id": {
                              "type": "string",
                              "description": "Unique identifier of the voice. Can be retrieved from <a href=\"https://docs.heygen.com/reference/list-voices-v2\" target=\"_blank\">List All Voices (V2)</a> endpoint. _Applies only to the `text` type._ "
                            },
                            "input_text": {
                              "type": "string",
                              "description": "Text that the avatar/talking photo will speak. _Applies only to the `text` type._  "
                            },
                            "speed": {
                              "type": "number",
                              "description": "Voice speed, value ranges from `0.5` to `1.5`. _Applies only to `text` type._",
                              "format": "float",
                              "default": "1"
                            },
                            "pitch": {
                              "type": "integer",
                              "description": "Voice pitch, value ranges from `-50` to `50`. _Applies only to `text` type._",
                              "format": "int32",
                              "default": "0"
                            },
                            "emotion": {
                              "type": "string",
                              "description": "Adds emotion to voice, if supported. _Applies only to `text` type._",
                              "enum": [
                                "Excited",
                                "Friendly",
                                "Serious",
                                "Soothing",
                                "Broadcaster"
                              ]
                            },
                            "locale": {
                              "type": "string",
                              "description": "Voice accents/locales for multilingual voices (e.g., en-US, en-IN, pt-PT, pt-BR). Can be retrieved from <a href=\"https://docs.heygen.com/reference/list-all-locales-for-voices\" target=\"_blank\">List All Locales for Voices</a> endpoint. _Applies only to `text` type._"
                            },
                            "elevenlabs_settings": {
                              "type": "object",
                              "description": "Advanced voice customization via ElevenLabs. _Applies only to `text` type._",
                              "properties": {
                                "model": {
                                  "type": "string",
                                  "description": "The ElevenLabs model to use. ",
                                  "enum": [
                                    "eleven_monolingual_v1",
                                    "eleven_multilingual_v1",
                                    "eleven_multilingual_v2",
                                    "eleven_turbo_v2",
                                    "eleven_turbo_v2_5",
                                    "eleven_v3"
                                  ]
                                },
                                "similarity_boost": {
                                  "type": "number",
                                  "description": "Controls how similar the generated speech should be to the original voice. Value ranges from `0.0` to `1.0`.",
                                  "format": "float"
                                },
                                "stability": {
                                  "type": "number",
                                  "description": "Controls the stability of the voice generation. Higher values result in more consistent and stable output. Value ranges from `0.0` to `1.0`. Note for eleven_v3 model, default value is `1.0`, allowed value to one of `0`, `0.5`, `1.0`",
                                  "format": "float"
                                },
                                "style": {
                                  "type": "number",
                                  "format": "float",
                                  "description": "Controls the style intensity of the generated speech. Value ranges from `0.0` to `1.0`. "
                                }
                              }
                            },
                            "audio_url": {
                              "type": "string",
                              "description": "URL of the uploaded audio. Can be retrieved from <a href=\"https://docs.heygen.com/reference/upload-asset\" target=\"_blank\">Upload Asset</a> endpoint. Either `audio_url` or `audio_asset_id` must be provided. Omitting both or defining both will result in an error. _Applies only to `audio` type._"
                            },
                            "audio_asset_id": {
                              "type": "string",
                              "description": "Unique identifier of the uploaded audio. Can be retrieved from <a href=\"https://docs.heygen.com/reference/upload-asset\" target=\"_blank\">Upload Asset</a> endpoint. Either `audio_url` or `audio_asset_id` must be provided. Omitting both or defining both will result in an error. _Applies only to `audio` type._"
                            },
                            "duration": {
                              "type": "string",
                              "default": "1",
                              "description": "Duration of silence, value ranges from `1.0` and `100.0`. _Applies only to `silence` type._"
                            }
                          },
                          "required": [
                            "type",
                            "voice_id",
                            "input_text",
                            "audio_url",
                            "audio_asset_id",
                            "duration"
                          ]
                        },
                        "background": {
                          "type": "object",
                          "description": "Could be color, image, or video. ",
                          "properties": {
                            "type": {
                              "type": "string",
                              "description": "Specifies the background type. ",
                              "enum": [
                                "color",
                                "image",
                                "video"
                              ]
                            },
                            "value": {
                              "type": "string",
                              "description": "Hex colour code for the background. _Applies only to `color` type._",
                              "default": "#FFFFFF"
                            },
                            "url": {
                              "type": "string",
                              "description": "URL of the uploaded image/video. Can be retrieved from the response of <a href=\"https://docs.heygen.com/reference/upload-asset\" target=\"_blank\">Upload Asset</a> endpoint.  _Applies only to `image` or `video` type._"
                            },
                            "image_asset_id": {
                              "type": "string",
                              "description": "Unique identifier of the uploaded image. Can be retrieved from the response of <a href=\"https://docs.heygen.com/reference/upload-asset\" target=\"_blank\">Upload Asset</a> endpoint. Either `url` or `image_asset_id` must be provided. Omitting both or defining both will result in an error. _Applies only to `image` type._"
                            },
                            "video_asset_id": {
                              "type": "string",
                              "description": "Unique identifier of the uploaded video. Can be retrieved from the response of <a href=\"https://docs.heygen.com/reference/upload-asset\" target=\"_blank\">Upload Asset</a> endpoint. Either `url` or `video_asset_id` must be provided. Omitting both or defining both will result in an error. _Applies only to `video` type._"
                            },
                            "play_style": {
                              "type": "string",
                              "description": "Playback mode. Refer to <a href=\"https://docs.heygen.com/docs/video-playback-styles-in-template\" target=\"_blank\">Video Playback Styles</a> for more details.",
                              "enum": [
                                "freeze",
                                "loop",
                                "fit_to_scene"
                              ]
                            },
                            "fit": {
                              "type": "string",
                              "description": "Choose how background video fits to the screen. ",
                              "enum": [
                                "crop",
                                "cover",
                                "contain",
                                "none"
                              ],
                              "default": "cover"
                            }
                          },
                          "required": [
                            "type",
                            "value",
                            "url",
                            "image_asset_id",
                            "video_asset_id",
                            "play_style"
                          ]
                        },
                        "text": {
                          "type": "object",
                          "properties": {
                            "type": {
                              "type": "string",
                              "description": "Specifies the element type. Must be \"text\".",
                              "enum": [
                                "text"
                              ]
                            },
                            "text": {
                              "type": "string",
                              "description": "The actual text content to be displayed in the video. "
                            },
                            "font_family": {
                              "type": "string",
                              "description": "Font family name (e.g., \"Arial\")."
                            },
                            "font_size": {
                              "type": "number",
                              "description": "Font size in points. Must be greater than 0.",
                              "format": "float"
                            },
                            "font_weight": {
                              "type": "string",
                              "description": "Font weight.",
                              "enum": [
                                "bold"
                              ]
                            },
                            "color": {
                              "type": "string",
                              "description": "Text color in hex format (e.g., `#FFFFFF`)."
                            },
                            "position": {
                              "type": "object",
                              "properties": {
                                "x": {
                                  "type": "number",
                                  "format": "float",
                                  "description": "Horizontal position."
                                },
                                "y": {
                                  "type": "number",
                                  "format": "float",
                                  "description": "Vertical position. "
                                }
                              },
                              "description": "Specifies where text is placed on screen. "
                            },
                            "text_align": {
                              "type": "string",
                              "enum": [
                                "left",
                                "center",
                                "right"
                              ],
                              "description": "Text alignment within its container. "
                            },
                            "line_height": {
                              "type": "number",
                              "description": "Line height (spacing between lines). Must be greater than 0.0.",
                              "format": "float"
                            },
                            "width": {
                              "type": "number"
                            }
                          },
                          "description": "Text displayed on the screen.",
                          "required": [
                            "type",
                            "text",
                            "line_height"
                          ]
                        }
                      },
                      "type": "object"
                    },
                    "description": "Array of video input settings (scenes). Must contain between 1 to 50 items. A video input describes the avatar, background, voice, and script, which together equals a 'scene'."
                  },
                  "dimension": {
                    "type": "object",
                    "description": "Custom dimensions for the output video.",
                    "properties": {
                      "width": {
                        "type": "integer",
                        "default": "1920",
                        "format": "int32",
                        "description": "Width of the output video. "
                      },
                      "height": {
                        "type": "integer",
                        "default": "1080",
                        "format": "int32",
                        "description": "Height of the output video. "
                      }
                    }
                  },
                  "folder_id": {
                    "type": "string",
                    "description": "Unique identifier of the folder where the video is stored. Can be retrieved from the <a href=\"https://docs.heygen.com/reference/list-folders\" target=\"_blank\">List Folders</a> endpoint if folder already exists, or from the response of the <a href=\"https://docs.heygen.com/reference/create-folder\" target=\"_blank\">Create Folder</a> endpoint after creating a new folder."
                  },
                  "callback_url": {
                    "type": "string",
                    "description": "URL to notify when video rendering is complete, useful when your callback endpoint is dynamic and each video requires a separate callback. Using a webhook endpoint is still the recommended approach, as it provides more customization options such as secrets, event filtering, and more. If both `webhook` and `callback_url` are used, events will be sent to both endpoints."
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Video creation initiated successfully - Expand to view the detailed response schema.",
            "content": {
              "application/json": {
                "examples": {
                  "Result": {
                    "value": {
                      "error": null,
                      "data": {
                        "video_id": "af273759c9xa47369e05418c69drq174"
                      }
                    }
                  }
                },
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "description": "Displays error message when the request fails; null if the request is successful."
                    },
                    "data": {
                      "type": "object",
                      "properties": {
                        "video_id": {
                          "type": "string",
                          "description": "Unique identifier of the generated video. "
                        }
                      },
                      "description": "Contains the video ID. "
                    }
                  }
                }
              }
            }
          },
          "400": {
            "description": "<ul><li>Invalid parameters:<ul><li><code>circle_background_color</code> must be in hex format.</li><li>Either <code>url</code> or <code>image_asset_id</code> needs to be provided.</li><li>Either <code>audio_url</code> or <code>audio_asset_id</code> needs to be provided.</li></ul></li></ul>",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {}
                },
                "examples": {
                  "Color Format Requirement": {
                    "summary": "Color Format Requirement",
                    "value": {
                      "data": null,
                      "error": {
                        "code": "invalid_parameter",
                        "message": "video_inputs.0.character.avatar.circle_background_color is invalid: String should match pattern '^#[0-9a-fA-F]{6}$'"
                      }
                    }
                  },
                  "Audio Input Requirement": {
                    "summary": "Audio Input Requirement",
                    "value": {
                      "data": null,
                      "error": {
                        "code": "invalid_parameter",
                        "message": "video_inputs.0.voice.audio is invalid: Value error, either audio_url or audio_asset_id needs to be provided"
                      }
                    }
                  },
                  "Image Input Requirement": {
                    "summary": "Image Input Requirement",
                    "value": {
                      "data": null,
                      "error": {
                        "code": "invalid_parameter",
                        "message": "video_inputs.0.background.image is invalid: Value error, either url or image_asset_id needs to be provided"
                      }
                    }
                  }
                }
              }
            }
          }
        },
        "deprecated": false
      }
    }
  },
  "x-readme": {
    "headers": [],
    "explorer-enabled": true,
    "proxy-enabled": true
  },
  "x-readme-fauxas": true
}
```