Bases: CachePolicy
LRU cache policy backed by a single OrderedDict.
Source code in vllm/v1/kv_offload/cpu/policies/lru.py
| class LRUCachePolicy(CachePolicy):
"""LRU cache policy backed by a single OrderedDict."""
def __init__(self, cache_capacity: int):
# cache_capacity unused by LRU but accepted for a uniform constructor
self.blocks: OrderedDict[OffloadKey, BlockStatus] = OrderedDict()
def get(self, key: OffloadKey) -> BlockStatus | None:
return self.blocks.get(key)
def insert(self, key: OffloadKey, block: BlockStatus) -> None:
self.blocks[key] = block
def remove(self, key: OffloadKey) -> None:
del self.blocks[key]
def touch(self, keys: Iterable[OffloadKey]) -> None:
for key in reversed(list(keys)):
if key in self.blocks:
self.blocks.move_to_end(key)
def evict(
self, n: int, protected: set[OffloadKey]
) -> list[tuple[OffloadKey, BlockStatus]] | None:
if n == 0:
return []
candidates: list[tuple[OffloadKey, BlockStatus]] = []
for key, block in self.blocks.items():
if block.ref_cnt == 0 and key not in protected:
candidates.append((key, block))
if len(candidates) == n:
break
if len(candidates) < n:
return None
for key, _ in candidates:
del self.blocks[key]
return candidates
|