123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- {
- "model_type": "diffusion_cond",
- "sample_size": 2097152,
- "sample_rate": 44100,
- "audio_channels": 2,
- "model": {
- "pretransform": {
- "type": "autoencoder",
- "iterate_batch": true,
- "config": {
- "encoder": {
- "type": "oobleck",
- "requires_grad": false,
- "config": {
- "in_channels": 2,
- "channels": 128,
- "c_mults": [1, 2, 4, 8, 16],
- "strides": [2, 4, 4, 8, 8],
- "latent_dim": 128,
- "use_snake": true
- }
- },
- "decoder": {
- "type": "oobleck",
- "config": {
- "out_channels": 2,
- "channels": 128,
- "c_mults": [1, 2, 4, 8, 16],
- "strides": [2, 4, 4, 8, 8],
- "latent_dim": 64,
- "use_snake": true,
- "final_tanh": false
- }
- },
- "bottleneck": {
- "type": "vae"
- },
- "latent_dim": 64,
- "downsampling_ratio": 2048,
- "io_channels": 2
- }
- },
- "conditioning": {
- "configs": [
- {
- "id": "prompt",
- "type": "t5",
- "config": {
- "t5_model_name": "t5-base",
- "max_length": 128
- }
- },
- {
- "id": "seconds_start",
- "type": "number",
- "config": {
- "min_val": 0,
- "max_val": 512
- }
- },
- {
- "id": "seconds_total",
- "type": "number",
- "config": {
- "min_val": 0,
- "max_val": 512
- }
- }
- ],
- "cond_dim": 768
- },
- "diffusion": {
- "cross_attention_cond_ids": ["prompt", "seconds_start", "seconds_total"],
- "global_cond_ids": ["seconds_start", "seconds_total"],
- "type": "dit",
- "config": {
- "io_channels": 64,
- "embed_dim": 1536,
- "depth": 24,
- "num_heads": 24,
- "cond_token_dim": 768,
- "global_cond_dim": 1536,
- "project_cond_tokens": false,
- "transformer_type": "continuous_transformer"
- }
- },
- "io_channels": 64
- },
- "training": {
- "use_ema": true,
- "log_loss_info": false,
- "optimizer_configs": {
- "diffusion": {
- "optimizer": {
- "type": "AdamW",
- "config": {
- "lr": 5e-5,
- "betas": [0.9, 0.999],
- "weight_decay": 1e-3
- }
- },
- "scheduler": {
- "type": "InverseLR",
- "config": {
- "inv_gamma": 1000000,
- "power": 0.5,
- "warmup": 0.99
- }
- }
- }
- },
- "demo": {
- "demo_every": 2000,
- "demo_steps": 250,
- "num_demos": 4,
- "demo_cond": [
- {"prompt": "Amen break 174 BPM", "seconds_start": 0, "seconds_total": 12},
- {"prompt": "A beautiful orchestral symphony, classical music", "seconds_start": 0, "seconds_total": 160},
- {"prompt": "Chill hip-hop beat, chillhop", "seconds_start": 0, "seconds_total": 190},
- {"prompt": "A pop song about love and loss", "seconds_start": 0, "seconds_total": 180}
- ],
- "demo_cfg_scales": [3, 6, 9]
- }
- }
- }
|