default.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. from yacs.config import CfgNode as CN
  2. _C = CN()
  3. _C.TAG = "style_id_emotion"
  4. _C.DECODER_TYPE = "DisentangleDecoder"
  5. _C.CONTENT_ENCODER_TYPE = "ContentW2VEncoder"
  6. _C.STYLE_ENCODER_TYPE = "StyleEncoder"
  7. _C.DIFFNET_TYPE = "DiffusionNet"
  8. _C.WIN_SIZE = 5
  9. _C.D_MODEL = 256
  10. _C.DATASET = CN()
  11. _C.DATASET.FACE3D_DIM = 64
  12. _C.DATASET.NUM_FRAMES = 64
  13. _C.DATASET.STYLE_MAX_LEN = 256
  14. _C.TRAIN = CN()
  15. _C.TRAIN.FACE3D_LATENT = CN()
  16. _C.TRAIN.FACE3D_LATENT.TYPE = "face3d"
  17. _C.DIFFUSION = CN()
  18. _C.DIFFUSION.PREDICT_WHAT = "x0" # noise | x0
  19. _C.DIFFUSION.SCHEDULE = CN()
  20. _C.DIFFUSION.SCHEDULE.NUM_STEPS = 1000
  21. _C.DIFFUSION.SCHEDULE.BETA_1 = 1e-4
  22. _C.DIFFUSION.SCHEDULE.BETA_T = 0.02
  23. _C.DIFFUSION.SCHEDULE.MODE = "linear"
  24. _C.CONTENT_ENCODER = CN()
  25. _C.CONTENT_ENCODER.d_model = _C.D_MODEL
  26. _C.CONTENT_ENCODER.nhead = 8
  27. _C.CONTENT_ENCODER.num_encoder_layers = 3
  28. _C.CONTENT_ENCODER.dim_feedforward = 4 * _C.D_MODEL
  29. _C.CONTENT_ENCODER.dropout = 0.1
  30. _C.CONTENT_ENCODER.activation = "relu"
  31. _C.CONTENT_ENCODER.normalize_before = False
  32. _C.CONTENT_ENCODER.pos_embed_len = 2 * _C.WIN_SIZE + 1
  33. _C.STYLE_ENCODER = CN()
  34. _C.STYLE_ENCODER.d_model = _C.D_MODEL
  35. _C.STYLE_ENCODER.nhead = 8
  36. _C.STYLE_ENCODER.num_encoder_layers = 3
  37. _C.STYLE_ENCODER.dim_feedforward = 4 * _C.D_MODEL
  38. _C.STYLE_ENCODER.dropout = 0.1
  39. _C.STYLE_ENCODER.activation = "relu"
  40. _C.STYLE_ENCODER.normalize_before = False
  41. _C.STYLE_ENCODER.pos_embed_len = _C.DATASET.STYLE_MAX_LEN
  42. _C.STYLE_ENCODER.aggregate_method = (
  43. "self_attention_pooling" # average | self_attention_pooling
  44. )
  45. # _C.STYLE_ENCODER.input_dim = _C.DATASET.FACE3D_DIM
  46. _C.DECODER = CN()
  47. _C.DECODER.d_model = _C.D_MODEL
  48. _C.DECODER.nhead = 8
  49. _C.DECODER.num_decoder_layers = 3
  50. _C.DECODER.dim_feedforward = 4 * _C.D_MODEL
  51. _C.DECODER.dropout = 0.1
  52. _C.DECODER.activation = "relu"
  53. _C.DECODER.normalize_before = False
  54. _C.DECODER.return_intermediate_dec = False
  55. _C.DECODER.pos_embed_len = 2 * _C.WIN_SIZE + 1
  56. _C.DECODER.network_type = "TransformerDecoder"
  57. _C.DECODER.dynamic_K = None
  58. _C.DECODER.dynamic_ratio = None
  59. # _C.DECODER.output_dim = _C.DATASET.FACE3D_DIM
  60. # LSFM basis:
  61. # _C.DECODER.upper_face3d_indices = tuple(list(range(19)) + list(range(46, 51)))
  62. # _C.DECODER.lower_face3d_indices = tuple(range(19, 46))
  63. # BFM basis:
  64. # fmt: off
  65. _C.DECODER.upper_face3d_indices = [6, 8, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]
  66. # fmt: on
  67. _C.DECODER.lower_face3d_indices = [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14]
  68. _C.CF_GUIDANCE = CN()
  69. _C.CF_GUIDANCE.TRAINING = True
  70. _C.CF_GUIDANCE.INFERENCE = True
  71. _C.CF_GUIDANCE.NULL_PROB = 0.1
  72. _C.CF_GUIDANCE.SCALE = 1.0
  73. _C.INFERENCE = CN()
  74. _C.INFERENCE.CHECKPOINT = "checkpoints/denoising_network.pth"
  75. def get_cfg_defaults():
  76. """Get a yacs CfgNode object with default values for my_project."""
  77. return _C.clone()