Browse Source

bump xformers and clean up leftover code

AlpinDale 1 year ago
parent
commit
c687430ce7
3 changed files with 7 additions and 6 deletions
  1. 3 2
      aphrodite/modeling/layers/attention.py
  2. 3 3
      aphrodite/processing/scheduler.py
  3. 1 1
      requirements.txt

+ 3 - 2
aphrodite/modeling/layers/attention.py

@@ -339,11 +339,12 @@ class PagedAttentionWithALiBi(PagedAttention):
             # be sliced from a tensor whose length is a multiple of 8.
             padded_len = (prompt_len + 7) // 8 * 8
             bias = torch.empty(
+                1, # batch_size
                 self.num_heads,
-                padded_len,
+                prompt_len,
                 padded_len,
                 device=self.alibi_slopes.device,
-            )[:, :prompt_len, :prompt_len].copy_(bias)
+            )[:, :, :, :prompt_len].copy_(bias)
             bias.mul_(self.alibi_slopes[:, None, None])
             attn_bias = LowerTriangularMaskWithTensorBias(bias)
             input_metadata.attn_bias.append(attn_bias)

+ 3 - 3
aphrodite/processing/scheduler.py

@@ -374,9 +374,9 @@ class Scheduler:
         seq_group: SequenceGroup,
         blocks_to_swap_out: Dict[int, int],
     ) -> None:
-        seqs = seq_group.get_seqs(status=SequenceStatus.RUNNING)
-        for seq in seqs:
-            seq.status = SequenceStatus.SWAPPED
+        # seqs = seq_group.get_seqs(status=SequenceStatus.RUNNING)
+        # for seq in seqs:
+        #     seq.status = SequenceStatus.SWAPPED
         self._swap_out(seq_group, blocks_to_swap_out)
         self.swapped.append(seq_group)
 

+ 1 - 1
requirements.txt

@@ -7,7 +7,7 @@ torch >= 2.0.0
 transformers >= 4.31.0
 uvicorn
 openai # for fastapi's openai proxy emulation
-xformers >= 0.0.19
+xformers >= 0.0.21
 mypy
 pytest
 fschat >= 0.2.23