Use Agent Memory Short-Term APIs with LangGraph

LangGraph applications often need to preserve recent working context without passing the full conversation back to the model on every turn. If you only keep the latest message in graph state, the model can easily lose track of prior task details, intermediate progress, or the active thread topic.

In this article, you will use Agent Memory short-term APIs inside a LangGraph flow so the graph can recover recent thread context on demand. The flow uses get_summary() to carry forward a compact summary of earlier messages and get_context_card() to surface the records most relevant to the latest user turn.

In this article, you will learn how to:

Tip: For package setup, see Get Started with Agent Memory. If you need a local Oracle AI Database for this example, see Run Oracle AI Database Locally.

Configure Agent Memory and LangGraph

Create an Agent Memory client with an Oracle DB connection or pool, configure a Embedder for vector search, and use ChatOpenAI for the LangGraph node.

from typing import Any

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import END, START, MessagesState, StateGraph

from oracleagentmemory.core.embedders.embedder import Embedder
from oracleagentmemory.core.oracleagentmemory import OracleAgentMemory

embedder = Embedder(
    model="YOUR_EMBEDDING_MODEL",
    api_base="YOUR_EMBEDDING_BASE_URL",
    api_key="YOUR_EMBEDDING_API_KEY",
)
langgraph_llm = ChatOpenAI(
    model="YOUR_CHAT_MODEL",
    base_url="YOUR_CHAT_BASE_URL",
    api_key="YOUR_CHAT_API_KEY",
    temperature=0,
)
db_pool = ...  #an oracledb connection or connection pool



class ShortTermState(MessagesState):
    """LangGraph state extended with Oracle Agent Memory short-term context."""

    thread_summary: str
    context_card: str


agent_memory = OracleAgentMemory(
    connection=db_pool,
    embedder=embedder,
    extract_memories=False,
)
thread = agent_memory.create_thread(
    thread_id="langgraph_short_term_demo",
    user_id="user_123",
    agent_id="assistant_456",
)

Build a Flow that Loads Short-Term Context

Before each model call, the flow reads thread.get_summary(except_last=1) and thread.get_context_card() from Agent Memory. This lets the graph keep only the latest user message in LangGraph state while still recovering recent working context from the thread.

def _message_text(message: Any) -> str:
    content = getattr(message, "content", "")
    if isinstance(content, str):
        return content
    return str(content)


def load_short_term_context(_: ShortTermState) -> dict[str, str]:
    summary_messages = thread.get_summary(except_last=1, token_budget=250)
    summary_text = (
        summary_messages[0].content if summary_messages else "No prior thread summary."
    )
    context_card = thread.get_context_card()
    if not context_card:
        context_card = "<context_card>\n  No relevant short-term context yet.\n</context_card>"
    return {
        "thread_summary": summary_text,
        "context_card": context_card,
    }


def call_model(state: ShortTermState) -> dict[str, list[Any]]:
    response = langgraph_llm.invoke(
        [
            SystemMessage(
                content=(
                    "You are a helpful engineering assistant. "
                    "Answer in at most two short sentences. "
                    "Use the Oracle Agent Memory short-term context below.\n\n"
                    f"Thread summary:\n{state['thread_summary']}\n\n"
                    f"Context card:\n{state['context_card']}"
                )
            ),
            HumanMessage(content=_message_text(state["messages"][-1])),
        ]
    )
    return {"messages": [response]}


builder = StateGraph(ShortTermState)
builder.add_node("load_short_term_context", load_short_term_context)
builder.add_node("call_model", call_model)
builder.add_edge(START, "load_short_term_context")
builder.add_edge("load_short_term_context", "call_model")
builder.add_edge("call_model", END)
graph = builder.compile()

Answer in a Later Turn with Summary and Context Card

Append each user and assistant message to the Agent Memory thread, then let the LangGraph flow answer a later turn using only the latest user message plus the loaded short-term context.

def run_turn(user_text: str) -> str:
    thread.add_messages([{"role": "user", "content": user_text}])
    result = graph.invoke({"messages": [HumanMessage(content=user_text)]})
    assistant_text = _message_text(result["messages"][-1])
    thread.add_messages([{"role": "assistant", "content": assistant_text}])
    print("Thread summary:")
    print(result["thread_summary"])
    print("Context card:")
    print(result["context_card"])
    print("Assistant:")
    print(assistant_text)
    return assistant_text


run_turn(
    "I'm Maya. I'm migrating our nightly invoice reconciliation workflow "
    "from cron jobs to LangGraph."
)
run_turn("The failing step right now is ledger enrichment after reconciliation.")
final_answer = run_turn("What workflow am I migrating, which step is failing, and who am I?")

print(final_answer)

Output:

You're Maya, migrating your nightly invoice reconciliation workflow from cron jobs
    to LangGraph, and the ledger-enrichment step after reconciliation is currently failing.

Conclusion

In this article we learned how to use Agent Memory short-term APIs inside a LangGraph flow, load get_summary(except_last=1) and get_context_card() before each model call, and answer later turns with recent thread context without resending the full transcript.

Tip: Having learned how to add short-term thread context to a LangGraph flow, you may now proceed to Integrate Agent Memory with LangGraph.

Full Code

#Copyright © 2026 Oracle and/or its affiliates.
#isort:skip_file
#fmt: off
#Agent Memory Code Example - LangGraph Short-Term Memory
#--------------------------------------------------------

#How to use:
#Create a new Python virtual environment and install the latest oracleagentmemory version.

#You can now run the script
#1. As a Python file:
#```bash
#python howto_shorttermmemory.py
#```
#2. As a Notebook (in VSCode):
#When viewing the file,
#- press the keys Ctrl + Enter to run the selected cell
#- or Shift + Enter to run the selected cell and move to the cell below


##Configure Oracle Agent Memory and LangGraph for short term context

#%%
from typing import Any

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import END, START, MessagesState, StateGraph

from oracleagentmemory.core.embedders.embedder import Embedder
from oracleagentmemory.core.oracleagentmemory import OracleAgentMemory

embedder = Embedder(
    model="YOUR_EMBEDDING_MODEL",
    api_base="YOUR_EMBEDDING_BASE_URL",
    api_key="YOUR_EMBEDDING_API_KEY",
)
langgraph_llm = ChatOpenAI(
    model="YOUR_CHAT_MODEL",
    base_url="YOUR_CHAT_BASE_URL",
    api_key="YOUR_CHAT_API_KEY",
    temperature=0,
)
db_pool = ...  #an oracledb connection or connection pool


class ShortTermState(MessagesState):
    """LangGraph state extended with Oracle Agent Memory short-term context."""

    thread_summary: str
    context_card: str


agent_memory = OracleAgentMemory(
    connection=db_pool,
    embedder=embedder,
    extract_memories=False,
)
thread = agent_memory.create_thread(
    thread_id="langgraph_short_term_demo",
    user_id="user_123",
    agent_id="assistant_456",
)


##Build a LangGraph flow that loads short term context

#%%
def _message_text(message: Any) -> str:
    content = getattr(message, "content", "")
    if isinstance(content, str):
        return content
    return str(content)


def load_short_term_context(_: ShortTermState) -> dict[str, str]:
    summary_messages = thread.get_summary(except_last=1, token_budget=250)
    summary_text = (
        summary_messages[0].content if summary_messages else "No prior thread summary."
    )
    context_card = thread.get_context_card()
    if not context_card:
        context_card = "<context_card>\n  No relevant short-term context yet.\n</context_card>"
    return {
        "thread_summary": summary_text,
        "context_card": context_card,
    }


def call_model(state: ShortTermState) -> dict[str, list[Any]]:
    response = langgraph_llm.invoke(
        [
            SystemMessage(
                content=(
                    "You are a helpful engineering assistant. "
                    "Answer in at most two short sentences. "
                    "Use the Oracle Agent Memory short-term context below.\n\n"
                    f"Thread summary:\n{state['thread_summary']}\n\n"
                    f"Context card:\n{state['context_card']}"
                )
            ),
            HumanMessage(content=_message_text(state["messages"][-1])),
        ]
    )
    return {"messages": [response]}


builder = StateGraph(ShortTermState)
builder.add_node("load_short_term_context", load_short_term_context)
builder.add_node("call_model", call_model)
builder.add_edge(START, "load_short_term_context")
builder.add_edge("load_short_term_context", "call_model")
builder.add_edge("call_model", END)
graph = builder.compile()


##Answer a new turn with summary and context card

#%%
def run_turn(user_text: str) -> str:
    thread.add_messages([{"role": "user", "content": user_text}])
    result = graph.invoke({"messages": [HumanMessage(content=user_text)]})
    assistant_text = _message_text(result["messages"][-1])
    thread.add_messages([{"role": "assistant", "content": assistant_text}])
    print("Thread summary:")
    print(result["thread_summary"])
    print("Context card:")
    print(result["context_card"])
    print("Assistant:")
    print(assistant_text)
    return assistant_text


run_turn(
    "I'm Maya. I'm migrating our nightly invoice reconciliation workflow "
    "from cron jobs to LangGraph."
)
run_turn("The failing step right now is ledger enrichment after reconciliation.")
final_answer = run_turn("What workflow am I migrating, which step is failing, and who am I?")

print(final_answer)
#You're Maya, migrating your nightly invoice reconciliation workflow from cron jobs
#to LangGraph, and the ledger-enrichment step after reconciliation is currently failing.