1234567891011121314151617181920212223242526272829303132333435363738394041 |
- model:
- base_learning_rate: 4.5e-6
- target: taming.models.vqgan.VQModel
- params:
- embed_dim: 256
- n_embed: 1024
- image_key: depth
- ddconfig:
- double_z: False
- z_channels: 256
- resolution: 256
- in_channels: 1
- out_ch: 1
- ch: 128
- ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
- num_res_blocks: 2
- attn_resolutions: [16]
- dropout: 0.0
- lossconfig:
- target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
- params:
- disc_conditional: False
- disc_in_channels: 1
- disc_start: 50001
- disc_weight: 0.75
- codebook_weight: 1.0
- data:
- target: main.DataModuleFromConfig
- params:
- batch_size: 3
- num_workers: 8
- train:
- target: taming.data.imagenet.ImageNetTrainWithDepth
- params:
- size: 256
- validation:
- target: taming.data.imagenet.ImageNetValidationWithDepth
- params:
- size: 256
|