Skip to content
Running Flux 2 Locally
!nvidia-smi# !pip install -q diffusers transformers accelerate bitsandbytes huggingface_hub protobuf sentencepiece hf_transfer
import torch
import bitsandbytes as bnb
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Version: {torch.version.cuda}")
print(f"Is CUDA available? {torch.cuda.is_available()}")
print(f"GPU Name: {torch.cuda.get_device_name(0)}")
# Check if 4-bit quantization is supported (Ampere 3090 supports this natively)
try:
print(f"BitsAndBytes Version: {bnb.__version__}")
print("4-bit quantization check: PASSED")
except ImportError:
print("BitsAndBytes not installed correctly.")
import torch
from transformers import Mistral3ForConditionalGeneration
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
device = "cuda:0"
torch_dtype = torch.bfloat16transformer = Flux2Transformer2DModel.from_pretrained(
repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
)text_encoder = Mistral3ForConditionalGeneration.from_pretrained(
repo_id, subfolder="text_encoder", dtype=torch_dtype, device_map="cpu"
)pipe = Flux2Pipeline.from_pretrained(
repo_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch_dtype
)
pipe.enable_model_cpu_offload()prompt = (
"Realistic macro photo of a ladybug perched on the edge of a dew-covered rose petal, "
"early morning mist, soft natural light, shallow depth of field, crisp detail, creamy bokeh."
)image = pipe(
prompt=prompt,
generator=torch.Generator(device=device).manual_seed(42),
num_inference_steps=28,
guidance_scale=4,
).images[0]image# 6. Save
output_path = "flux2_local_full.png"
image.save(output_path)
print(f"Image saved to {output_path}")
advanced_prompt = """
{
"scene": "New Year's Eve night on a rooftop overlooking a glowing city skyline",
"subjects": [
{
"description": "Group of close friends including men and women in winter clothing, natural facial proportions, diverse appearances",
"position": "center and slightly spread across the frame",
"action": "standing together in a loose circle, some smiling softly, others quietly reflective, sharing the moment before midnight"
}
],
"style": "Cinematic semi-realistic illustration with grounded realism, subtle painterly softness",
"color_palette": ["#0B132B", "#1C2541", "#EAEAEA", "#F4D35E"],
"lighting": "Soft moonlight as ambient key light, warm glow from sparklers and nearby string lights illuminating faces, gentle contrast",
"mood": "Warm, intimate, reflective, hopeful",
"background": "Out-of-focus city skyline with distant fireworks softly lighting the sky, minimal visual noise",
"composition": "Wide medium shot, balanced framing, friends forming a natural arc, negative space above for sky and fireworks",
"camera": {
"angle": "eye-level",
"lens": "50mm cinematic look",
"depth_of_field": "sharp focus on group, gentle background blur"
},
"details": [
"subtle breath vapor in cold air",
"soft fabric textures on coats and scarves",
"sparklers emitting warm golden particles",
"natural body language and expressions",
"no exaggerated poses or faces"
],
"negative_prompt": [
"cartoon style",
"emote proportions",
"chibi",
"oversized heads",
"text",
"watermark",
"logo",
"overcrowded scene",
"harsh neon lighting",
"over-saturation",
"extra limbs",
"distorted faces"
]
}
"""image_2 = pipe(
prompt=advanced_prompt,
generator=torch.Generator(device=device).manual_seed(42),
num_inference_steps=50,
guidance_scale=4,
).images[0]image_2from diffusers.utils import load_image
prompt = (
"Using the provided reference image, generate a realistic photograph of a woman with a similar face, "
"standing at night and looking up at the sky with stars with a subtle, sad expression. "
"Natural skin tones, soft moonlight, gentle shadows, shallow depth of field, "
"with blurred city lights in the background."
)
indian_woman = load_image("https://images.pexels.com/photos/9317190/pexels-photo-9317190.jpeg")
image_3 = pipe(
prompt=prompt,
image=[indian_woman], #optional multi-image input
generator=torch.Generator(device=device).manual_seed(42),
num_inference_steps=28,
guidance_scale=4,
).images[0]