Running Flux 2 Locally

!nvidia-smi

# !pip install -q diffusers transformers accelerate bitsandbytes huggingface_hub protobuf sentencepiece hf_transfer

import torch
import bitsandbytes as bnb

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Version: {torch.version.cuda}")
print(f"Is CUDA available? {torch.cuda.is_available()}")
print(f"GPU Name: {torch.cuda.get_device_name(0)}")

# Check if 4-bit quantization is supported (Ampere 3090 supports this natively)
try:
    print(f"BitsAndBytes Version: {bnb.__version__}")
    print("4-bit quantization check: PASSED")
except ImportError:
    print("BitsAndBytes not installed correctly.")

import torch
from transformers import Mistral3ForConditionalGeneration
from diffusers import Flux2Pipeline, Flux2Transformer2DModel

repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
device = "cuda:0"
torch_dtype = torch.bfloat16

transformer = Flux2Transformer2DModel.from_pretrained(
  repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
)

text_encoder = Mistral3ForConditionalGeneration.from_pretrained(
  repo_id, subfolder="text_encoder", dtype=torch_dtype, device_map="cpu"
)

pipe = Flux2Pipeline.from_pretrained(
  repo_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch_dtype
)
pipe.enable_model_cpu_offload()

prompt = (
    "Realistic macro photo of a ladybug perched on the edge of a dew-covered rose petal, "
    "early morning mist, soft natural light, shallow depth of field, crisp detail, creamy bokeh."
)

image = pipe(
  prompt=prompt,
  generator=torch.Generator(device=device).manual_seed(42),
  num_inference_steps=28,
  guidance_scale=4,
).images[0]

image

# 6. Save
output_path = "flux2_local_full.png"
image.save(output_path)
print(f"Image saved to {output_path}")

advanced_prompt = """
{
  "scene": "New Year's Eve night on a rooftop overlooking a glowing city skyline",
  "subjects": [
    {
      "description": "Group of close friends including men and women in winter clothing, natural facial proportions, diverse appearances",
      "position": "center and slightly spread across the frame",
      "action": "standing together in a loose circle, some smiling softly, others quietly reflective, sharing the moment before midnight"
    }
  ],
  "style": "Cinematic semi-realistic illustration with grounded realism, subtle painterly softness",
  "color_palette": ["#0B132B", "#1C2541", "#EAEAEA", "#F4D35E"],
  "lighting": "Soft moonlight as ambient key light, warm glow from sparklers and nearby string lights illuminating faces, gentle contrast",
  "mood": "Warm, intimate, reflective, hopeful",
  "background": "Out-of-focus city skyline with distant fireworks softly lighting the sky, minimal visual noise",
  "composition": "Wide medium shot, balanced framing, friends forming a natural arc, negative space above for sky and fireworks",
  "camera": {
    "angle": "eye-level",
    "lens": "50mm cinematic look",
    "depth_of_field": "sharp focus on group, gentle background blur"
  },
  "details": [
    "subtle breath vapor in cold air",
    "soft fabric textures on coats and scarves",
    "sparklers emitting warm golden particles",
    "natural body language and expressions",
    "no exaggerated poses or faces"
  ],
  "negative_prompt": [
    "cartoon style",
    "emote proportions",
    "chibi",
    "oversized heads",
    "text",
    "watermark",
    "logo",
    "overcrowded scene",
    "harsh neon lighting",
    "over-saturation",
    "extra limbs",
    "distorted faces"
  ]
}
"""

image_2 = pipe(
  prompt=advanced_prompt,
  generator=torch.Generator(device=device).manual_seed(42),
  num_inference_steps=50,
  guidance_scale=4,
).images[0]

image_2

from diffusers.utils import load_image

prompt = (
    "Using the provided reference image, generate a realistic photograph of a woman with a similar face, "
    "standing at night and looking up at the sky with stars with a subtle, sad expression. "
    "Natural skin tones, soft moonlight, gentle shadows, shallow depth of field, "
    "with blurred city lights in the background."
)

indian_woman = load_image("https://images.pexels.com/photos/9317190/pexels-photo-9317190.jpeg")
image_3 = pipe(
    prompt=prompt,
    image=[indian_woman], #optional multi-image input
    generator=torch.Generator(device=device).manual_seed(42),
    num_inference_steps=28,
    guidance_scale=4,
).images[0]

‌
‌
‌