SDXL ControlNet Depth

An image generation pipeline built on Stable Diffusion XL that uses depth estimation to apply a provided control image during text-to-image inference.

Deploy now

‌

Model details

Developed by
Stability AI
Model family
Stable Diffusion
Use case
image generation
Version
XL 1.0
Variant
ControlNet - Depth
Size
XL
Hardware
A10G
License
Stability membership
Readme
View

View repository

Example usage

The model accepts a few main inputs:

prompt: This is text describing the image you want to generate. The output images tend to get better as you add more descriptive words to the prompt.
image: Is an image that must be provided by the user as a base64 string. This input image gets used by the ControlNet to control the output from Stable Diffusion XL.

The output JSON object contains a key called result which represents the generated image as a base64 string.

Input
import requests
import os
import base64
from PIL import Image
from io import BytesIO

# Replace the empty string with your model id below
model_id = ""
baseten_api_key = os.environ["BASETEN_API_KEY"]
BASE64_PREAMBLE = "data:image/png;base64,"

# Function used to convert a base64 string to a PIL image
def b64_to_pil(b64_str):
    return Image.open(BytesIO(base64.b64decode(b64_str.replace(BASE64_PREAMBLE, ""))))
    
def pil_to_b64(pil_img):
    buffered = BytesIO()
    pil_img.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

data = {
  "prompt": "a picture of a racoon",
  "image": pil_to_b64(Image.open("/path/to/image/input_image_1.jpg")),
}

# Call model endpoint
res = requests.post(
    f"https://model-{model_id}.api.baseten.co/production/predict",
    headers={"Authorization": f"Api-Key {baseten_api_key}"},
    json=data
)

# Get output image
res = res.json()
output = res.get("result")

# Convert the base64 model output to an image
img = b64_to_pil(output)
img.save("output_image.png")
os.system("open output_image.png")
JSON output
{
    "result": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBg..."
}
Preview

Here is another example using a different prompt and image.

Input
import requests
import os
import base64
from PIL import Image
from io import BytesIO

# Replace the empty string with your model id below
model_id = ""
baseten_api_key = os.environ["BASETEN_API_KEY"]
BASE64_PREAMBLE = "data:image/png;base64,"

# Function used to convert a base64 string to a PIL image
def b64_to_pil(b64_str):
    return Image.open(BytesIO(base64.b64decode(b64_str.replace(BASE64_PREAMBLE, ""))))
    
def pil_to_b64(pil_img):
    buffered = BytesIO()
    pil_img.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

data = {
  "prompt": "large bed, abstract painting on the wall, fluffy rug on the floor, ambient lighting, extremely detailed",
  "image": pil_to_b64(Image.open("/path/to/image/input_image_2.jpg")),
}

# Call model endpoint
res = requests.post(
    f"https://model-{model_id}.api.baseten.co/production/predict",
    headers={"Authorization": f"Api-Key {baseten_api_key}"},
    json=data
)

# Get output image
res = res.json()
output = res.get("result")

# Convert the base64 model output to an image
img = b64_to_pil(output)
img.save("output_image.png")
os.system("open output_image.png")
JSON output
{
    "result": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBg..."
}
Preview