imagenetdepth_vqgan.yaml 950 B

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. model:
  2. base_learning_rate: 4.5e-6
  3. target: taming.models.vqgan.VQModel
  4. params:
  5. embed_dim: 256
  6. n_embed: 1024
  7. image_key: depth
  8. ddconfig:
  9. double_z: False
  10. z_channels: 256
  11. resolution: 256
  12. in_channels: 1
  13. out_ch: 1
  14. ch: 128
  15. ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
  16. num_res_blocks: 2
  17. attn_resolutions: [16]
  18. dropout: 0.0
  19. lossconfig:
  20. target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
  21. params:
  22. disc_conditional: False
  23. disc_in_channels: 1
  24. disc_start: 50001
  25. disc_weight: 0.75
  26. codebook_weight: 1.0
  27. data:
  28. target: main.DataModuleFromConfig
  29. params:
  30. batch_size: 3
  31. num_workers: 8
  32. train:
  33. target: taming.data.imagenet.ImageNetTrainWithDepth
  34. params:
  35. size: 256
  36. validation:
  37. target: taming.data.imagenet.ImageNetValidationWithDepth
  38. params:
  39. size: 256