Memory Systems in AI Agents: Short-term, Long-term, and Entity
Understand how memory transforms AI agents from stateless responders to intelligent systems that learn and remember. Implement all three memory types.
Memory Systems in AI Agents: Short-term, Long-term, and Entity
Without memory, every interaction with an AI agent starts from zero. The agent doesn't remember your preferences, past conversations, or the context it built up. It's like having a conversation with someone who has complete amnesia—functional but frustrating.
Memory transforms agents from stateless responders into systems that accumulate knowledge, learn from experience, and maintain coherent long-running relationships with users and tasks.
This guide covers the three types of agent memory and how to implement each.
The Three Types of Memory
AI agent memory mirrors human memory systems:
1. Short-term Memory (Working Memory)
The current conversation and immediate context. Typically implemented through the conversation history sent to the LLM.
2. Long-term Memory (Episodic & Semantic)
Persistent storage of past interactions, learned facts, and accumulated knowledge. Survives across sessions.
3. Entity Memory
Structured knowledge about specific entities—people, projects, concepts—that the agent encounters.
Each type serves different purposes and requires different implementation strategies.
Short-term Memory
Short-term memory is the foundation. It's what allows an agent to maintain coherence within a single conversation.
Basic Implementation
class ShortTermMemory:
def __init__(self, max_messages: int = 50):
self.messages: list[dict] = []
self.max_messages = max_messages
def add(self, role: str, content: str):
self.messages.append({"role": role, "content": content})
self._enforce_limit()
def _enforce_limit(self):
if len(self.messages) > self.max_messages:
# Keep system message if present, trim oldest
if self.messages[0].get("role") == "system":
self.messages = [self.messages[0]] + self.messages[-(self.max_messages-1):]
else:
self.messages = self.messages[-self.max_messages:]
def get_messages(self) -> list[dict]:
return self.messages.copy()
def clear(self):
self.messages = []
Sliding Window with Summarization
When conversations get long, summarize older content:
class SummarizingMemory:
def __init__(self, llm, window_size: int = 20, summary_threshold: int = 40):
self.llm = llm
self.messages = []
self.summary = ""
self.window_size = window_size
self.summary_threshold = summary_threshold
def add(self, role: str, content: str):
self.messages.append({"role": role, "content": content})
if len(self.messages) > self.summary_threshold:
self._summarize_old_messages()
def _summarize_old_messages(self):
# Take oldest messages beyond window
to_summarize = self.messages[:-self.window_size]
to_keep = self.messages[-self.window_size:]
# Generate summary
summary_prompt = f"""
Summarize this conversation history, preserving key facts, decisions, and context:
Previous summary: {self.summary}
New messages to summarize:
{self._format_messages(to_summarize)}
"""
self.summary = self.llm.generate(summary_prompt)
self.messages = to_keep
def get_context(self) -> str:
context = ""
if self.summary:
context += f"Previous conversation summary:\n{self.summary}\n\n"
context += "Recent messages:\n" + self._format_messages(self.messages)
return context
def _format_messages(self, messages: list) -> str:
return "\n".join([f"{m['role']}: {m['content']}" for m in messages])
Token-Aware Memory
Manage memory based on token count:
import tiktoken
class TokenAwareMemory:
def __init__(self, max_tokens: int = 4000, model: str = "claude-sonnet-4-20250514"):
self.messages = []
self.max_tokens = max_tokens
self.encoder = tiktoken.encoding_for_model("gpt-4") # Approximation
def add(self, role: str, content: str):
self.messages.append({"role": role, "content": content})
self._enforce_token_limit()
def _count_tokens(self, messages: list) -> int:
text = " ".join([m["content"] for m in messages])
return len(self.encoder.encode(text))
def _enforce_token_limit(self):
while self._count_tokens(self.messages) > self.max_tokens and len(self.messages) > 1:
# Remove oldest non-system message
for i, msg in enumerate(self.messages):
if msg.get("role") != "system":
self.messages.pop(i)
break
Long-term Memory
Long-term memory persists across sessions, allowing agents to accumulate knowledge over time.
File-Based Implementation
Simple but effective for single-user agents:
import json
from pathlib import Path
from datetime import datetime
class LongTermMemory:
def __init__(self, storage_path: str = "agent_memory.json"):
self.storage_path = Path(storage_path)
self.memory = self._load()
def _load(self) -> dict:
if self.storage_path.exists():
return json.loads(self.storage_path.read_text())
return {
"facts": [],
"conversations": [],
"preferences": {},
"learnings": []
}
def _save(self):
self.storage_path.write_text(json.dumps(self.memory, indent=2, default=str))
def store_fact(self, fact: str, source: str = None, confidence: float = 1.0):
"""Store a learned fact"""
self.memory["facts"].append({
"fact": fact,
"source": source,
"confidence": confidence,
"timestamp": datetime.now().isoformat()
})
self._save()
def store_conversation_summary(self, summary: str, key_points: list[str] = None):
"""Store a conversation summary"""
self.memory["conversations"].append({
"summary": summary,
"key_points": key_points or [],
"timestamp": datetime.now().isoformat()
})
self._save()
def store_preference(self, key: str, value: any):
"""Store a user preference"""
self.memory["preferences"][key] = {
"value": value,
"updated": datetime.now().isoformat()
}
self._save()
def store_learning(self, learning: str, context: str = None):
"""Store a meta-learning about how to be more helpful"""
self.memory["learnings"].append({
"learning": learning,
"context": context,
"timestamp": datetime.now().isoformat()
})
self._save()
def get_relevant_memories(self, query: str, max_items: int = 5) -> list:
"""Retrieve memories relevant to a query"""
# Simple keyword matching - use embeddings in production
relevant = []
query_words = set(query.lower().split())
for fact in self.memory["facts"]:
fact_words = set(fact["fact"].lower().split())
if query_words & fact_words:
relevant.append(("fact", fact["fact"]))
for conv in self.memory["conversations"]:
conv_words = set(conv["summary"].lower().split())
if query_words & conv_words:
relevant.append(("conversation", conv["summary"]))
return relevant[:max_items]
Vector-Based Long-term Memory
For semantic search across memories:
from typing import List, Tuple
import numpy as np
class VectorMemory:
def __init__(self, embedding_model, dimension: int = 1536):
self.embedding_model = embedding_model
self.memories: List[dict] = []
self.embeddings: List[np.ndarray] = []
def add(self, content: str, metadata: dict = None):
"""Add a memory with its embedding"""
embedding = self.embedding_model.embed(content)
self.memories.append({
"content": content,
"metadata": metadata or {},
"timestamp": datetime.now().isoformat()
})
self.embeddings.append(embedding)
def search(self, query: str, top_k: int = 5) -> List[Tuple[dict, float]]:
"""Find memories most similar to query"""
query_embedding = self.embedding_model.embed(query)
# Calculate cosine similarities
similarities = []
for i, emb in enumerate(self.embeddings):
similarity = np.dot(query_embedding, emb) / (
np.linalg.norm(query_embedding) * np.linalg.norm(emb)
)
similarities.append((i, similarity))
# Sort by similarity
similarities.sort(key=lambda x: x[1], reverse=True)
# Return top-k results
results = []
for idx, sim in similarities[:top_k]:
results.append((self.memories[idx], sim))
return results
def save(self, path: str):
"""Persist to disk"""
data = {
"memories": self.memories,
"embeddings": [e.tolist() for e in self.embeddings]
}
Path(path).write_text(json.dumps(data))
def load(self, path: str):
"""Load from disk"""
data = json.loads(Path(path).read_text())
self.memories = data["memories"]
self.embeddings = [np.array(e) for e in data["embeddings"]]
Database-Backed Memory
For production systems:
from sqlalchemy import create_engine, Column, String, Float, DateTime, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Memory(Base):
__tablename__ = "memories"
id = Column(String, primary_key=True)
user_id = Column(String, index=True)
memory_type = Column(String) # fact, conversation, preference, learning
content = Column(String)
metadata = Column(JSON)
embedding = Column(JSON) # Store as JSON array
confidence = Column(Float, default=1.0)
created_at = Column(DateTime)
accessed_at = Column(DateTime)
access_count = Column(Integer, default=0)
class DatabaseMemory:
def __init__(self, connection_string: str):
self.engine = create_engine(connection_string)
Base.metadata.create_all(self.engine)
self.Session = sessionmaker(bind=self.engine)
def store(self, user_id: str, memory_type: str, content: str,
metadata: dict = None, embedding: list = None):
session = self.Session()
memory = Memory(
id=str(uuid.uuid4()),
user_id=user_id,
memory_type=memory_type,
content=content,
metadata=metadata or {},
embedding=embedding,
created_at=datetime.now(),
accessed_at=datetime.now()
)
session.add(memory)
session.commit()
session.close()
def search(self, user_id: str, query_embedding: list,
memory_type: str = None, top_k: int = 5) -> list:
session = self.Session()
query = session.query(Memory).filter(Memory.user_id == user_id)
if memory_type:
query = query.filter(Memory.memory_type == memory_type)
memories = query.all()
# Calculate similarities (in production, use pgvector or similar)
results = []
for mem in memories:
if mem.embedding:
sim = self._cosine_similarity(query_embedding, mem.embedding)
results.append((mem, sim))
results.sort(key=lambda x: x[1], reverse=True)
session.close()
return results[:top_k]
Entity Memory
Entity memory stores structured information about specific entities the agent encounters.
Basic Entity Store
from dataclasses import dataclass, field
from typing import Dict, List, Any, Optional
@dataclass
class Entity:
name: str
entity_type: str # person, project, company, concept
attributes: Dict[str, Any] = field(default_factory=dict)
relationships: List[Dict] = field(default_factory=list)
mentions: List[Dict] = field(default_factory=list)
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
class EntityMemory:
def __init__(self):
self.entities: Dict[str, Entity] = {}
def get_or_create(self, name: str, entity_type: str) -> Entity:
"""Get existing entity or create new one"""
key = f"{entity_type}:{name.lower()}"
if key not in self.entities:
self.entities[key] = Entity(name=name, entity_type=entity_type)
return self.entities[key]
def update_attribute(self, name: str, entity_type: str,
attribute: str, value: Any):
"""Update an entity's attribute"""
entity = self.get_or_create(name, entity_type)
entity.attributes[attribute] = value
entity.updated_at = datetime.now().isoformat()
def add_relationship(self, entity1_name: str, entity1_type: str,
relationship: str,
entity2_name: str, entity2_type: str):
"""Add a relationship between entities"""
entity1 = self.get_or_create(entity1_name, entity1_type)
entity2 = self.get_or_create(entity2_name, entity2_type)
entity1.relationships.append({
"type": relationship,
"target": f"{entity2_type}:{entity2_name}",
"created_at": datetime.now().isoformat()
})
def record_mention(self, name: str, entity_type: str,
context: str, source: str = None):
"""Record that an entity was mentioned"""
entity = self.get_or_create(name, entity_type)
entity.mentions.append({
"context": context,
"source": source,
"timestamp": datetime.now().isoformat()
})
def get_entity_context(self, name: str, entity_type: str) -> str:
"""Get a natural language summary of an entity"""
key = f"{entity_type}:{name.lower()}"
if key not in self.entities:
return f"No information stored about {name}"
entity = self.entities[key]
context = f"{entity.name} ({entity.entity_type})\n"
if entity.attributes:
context += "Attributes:\n"
for attr, value in entity.attributes.items():
context += f" - {attr}: {value}\n"
if entity.relationships:
context += "Relationships:\n"
for rel in entity.relationships:
context += f" - {rel['type']} {rel['target']}\n"
if entity.mentions:
context += f"Mentioned {len(entity.mentions)} times\n"
return context
Automatic Entity Extraction
Let the LLM extract entities from conversations:
class EntityExtractor:
def __init__(self, llm, entity_memory: EntityMemory):
self.llm = llm
self.entity_memory = entity_memory
def extract_and_store(self, text: str, source: str = None):
"""Extract entities from text and store them"""
extraction_prompt = f"""
Extract entities from this text. For each entity, provide:
- name: The entity name
- type: person, company, project, product, concept, location
- attributes: Any attributes mentioned (as key-value pairs)
- relationships: Any relationships to other entities
Text: {text}
Respond in JSON format:
{{
"entities": [
{{
"name": "...",
"type": "...",
"attributes": {{"key": "value"}},
"relationships": [{{"type": "...", "target": "..."}}]
}}
]
}}
"""
response = self.llm.generate(extraction_prompt)
entities = json.loads(response)
for entity_data in entities.get("entities", []):
name = entity_data["name"]
entity_type = entity_data["type"]
# Store attributes
for attr, value in entity_data.get("attributes", {}).items():
self.entity_memory.update_attribute(name, entity_type, attr, value)
# Store relationships
for rel in entity_data.get("relationships", []):
target_parts = rel["target"].split(":")
target_type = target_parts[0] if len(target_parts) > 1 else "unknown"
target_name = target_parts[-1]
self.entity_memory.add_relationship(
name, entity_type,
rel["type"],
target_name, target_type
)
# Record the mention
self.entity_memory.record_mention(name, entity_type, text, source)
Integrating Memory Systems
A complete agent uses all three memory types together:
class MemoryIntegratedAgent:
def __init__(self, llm):
self.llm = llm
self.short_term = ShortTermMemory(max_messages=50)
self.long_term = LongTermMemory()
self.entities = EntityMemory()
self.entity_extractor = EntityExtractor(llm, self.entities)
def build_context(self, user_message: str) -> str:
"""Build complete context from all memory systems"""
context_parts = []
# Long-term memories relevant to this query
relevant_memories = self.long_term.get_relevant_memories(user_message)
if relevant_memories:
context_parts.append("Relevant memories:")
for mem_type, content in relevant_memories:
context_parts.append(f" [{mem_type}] {content}")
# User preferences
prefs = self.long_term.memory.get("preferences", {})
if prefs:
context_parts.append("User preferences:")
for key, data in prefs.items():
context_parts.append(f" - {key}: {data['value']}")
# Relevant entities
# (In practice, extract entity names from user_message first)
# This is simplified for illustration
return "\n".join(context_parts)
def run(self, user_message: str) -> str:
# Build context from memories
memory_context = self.build_context(user_message)
# Add user message to short-term memory
self.short_term.add("user", user_message)
# Generate response with full context
system_prompt = f"""You are a helpful assistant with memory.
{memory_context}
Use this context when relevant, but don't force it if not applicable."""
messages = self.short_term.get_messages()
response = self.llm.generate(
system=system_prompt,
messages=messages
)
# Add response to short-term memory
self.short_term.add("assistant", response)
# Extract entities from the conversation
self.entity_extractor.extract_and_store(
f"User: {user_message}\nAssistant: {response}"
)
# Periodically store important information to long-term memory
self._maybe_consolidate()
return response
def _maybe_consolidate(self):
"""Periodically consolidate short-term to long-term memory"""
if len(self.short_term.messages) > 40:
# Summarize and store
summary = self._summarize_conversation()
self.long_term.store_conversation_summary(summary)
# Extract and store any preferences or facts
self._extract_learnings()
def _summarize_conversation(self) -> str:
messages = self.short_term.get_messages()
prompt = f"""Summarize this conversation, highlighting:
- Key decisions made
- Important information shared
- Any commitments or follow-ups
Conversation:
{self.short_term._format_messages(messages)}
"""
return self.llm.generate(prompt)
def _extract_learnings(self):
messages = self.short_term.get_messages()
prompt = f"""From this conversation, extract:
1. Any user preferences expressed
2. Important facts to remember
3. Things that went well or poorly (for improving future interactions)
Respond in JSON format.
Conversation:
{self.short_term._format_messages(messages)}
"""
learnings = json.loads(self.llm.generate(prompt))
for pref in learnings.get("preferences", []):
self.long_term.store_preference(pref["key"], pref["value"])
for fact in learnings.get("facts", []):
self.long_term.store_fact(fact)
for learning in learnings.get("learnings", []):
self.long_term.store_learning(learning)
Memory Best Practices
1. Decay and Forgetting
Not all memories should last forever:
def decay_memories(self, half_life_days: int = 30):
"""Reduce confidence of old memories"""
now = datetime.now()
for fact in self.memory["facts"]:
created = datetime.fromisoformat(fact["timestamp"])
age_days = (now - created).days
# Exponential decay
decay_factor = 0.5 ** (age_days / half_life_days)
fact["confidence"] *= decay_factor
# Remove very low confidence memories
self.memory["facts"] = [
f for f in self.memory["facts"]
if f["confidence"] > 0.1
]
2. Deduplication
Avoid storing the same information multiple times:
def store_fact_deduplicated(self, fact: str, threshold: float = 0.9):
"""Only store if sufficiently different from existing facts"""
fact_embedding = self.embed(fact)
for existing in self.memory["facts"]:
existing_embedding = self.embed(existing["fact"])
similarity = cosine_similarity(fact_embedding, existing_embedding)
if similarity > threshold:
# Update existing instead of adding new
existing["confidence"] = max(existing["confidence"], 0.9)
existing["timestamp"] = datetime.now().isoformat()
return
# No duplicate found, add new
self.store_fact(fact)
3. Privacy and Deletion
Allow users to control their data:
def forget_entity(self, name: str, entity_type: str):
"""Remove all information about an entity"""
key = f"{entity_type}:{name.lower()}"
if key in self.entities:
del self.entities[key]
def forget_user_data(self, user_id: str):
"""Remove all data for a user"""
# Remove from all memory stores
self.long_term.delete_user(user_id)
self.entities.delete_user(user_id)
self.short_term.clear()
4. Memory Compression
Keep memory efficient:
def compress_memories(self):
"""Consolidate similar memories"""
facts = self.memory["facts"]
# Group similar facts
groups = self._cluster_by_similarity(facts, threshold=0.8)
# Merge each group into a single, comprehensive fact
compressed = []
for group in groups:
if len(group) == 1:
compressed.append(group[0])
else:
merged = self._merge_facts(group)
compressed.append(merged)
self.memory["facts"] = compressed
Conclusion
Memory transforms AI agents from stateless query engines into intelligent systems that learn and grow. By implementing short-term, long-term, and entity memory, you create agents that:
- Maintain context within conversations
- Remember important information across sessions
- Build structured knowledge about their world
- Improve over time based on experience
The key is matching memory type to purpose:
- Short-term: Current conversation flow
- Long-term: Persistent knowledge and preferences
- Entity: Structured facts about specific things
With proper memory, your agents become true partners rather than tools that need constant re-explanation.
Ready to learn how agents improve themselves? Check out The Reflection Pattern for self-improvement techniques.