docs: Update hooks lesson with improved context tracker example

- Replace simple Stop-only context-usage hook with hook pair pattern
- Add UserPromptSubmit + Stop hook combination for tracking delta
- Include both char-estimation and tiktoken versions as separate files
- Show how to use session_id for isolated state tracking
This commit is contained in:
Luong NGUYEN
2025-12-25 18:10:16 +01:00
parent fa9603af11
commit 8ef1e4a0c0
3 changed files with 409 additions and 135 deletions
+134 -135
View File
@@ -480,170 +480,145 @@ if __name__ == "__main__":
}
```
### Example 6: Context Usage Reporter (Stop Hook)
### Example 6: Context Usage Tracker (Hook Pairs)
This example shows how to create a hook that reports context/token usage after each Claude response. It reads the conversation transcript and estimates token usage.
Track token consumption per request using `UserPromptSubmit` (pre-message) and `Stop` (post-response) hooks together.
**How it works:**
1. The hook receives `transcript_path` in the JSON input - this points to a JSONL file containing all conversation messages
2. The script reads the transcript file and calculates total character count
3. It estimates tokens using a simple heuristic (~4 characters per token)
4. Outputs a one-line report showing estimated usage vs model capacity
**File:** `.claude/hooks/context-usage.py`
**File:** `.claude/hooks/context-tracker.py`
```python
#!/usr/bin/env python3
"""
Context Usage Reporter Hook
Context Usage Tracker - Tracks token consumption per request.
Reports estimated context/token usage after each Claude response.
Uses the transcript_path field to read conversation history and estimate tokens.
Uses UserPromptSubmit as "pre-message" hook and Stop as "post-response" hook
to calculate the delta in token usage for each request.
Limitations:
- Token count is an ESTIMATE (~4 chars/token average)
- Actual token usage depends on the tokenizer and includes system prompts
- Use /context command for accurate real-time usage
Token Counting Methods:
1. Character estimation (default): ~4 chars per token, no dependencies
2. tiktoken (optional): More accurate (~90-95%), requires: pip install tiktoken
"""
import json
import sys
import os
import sys
import tempfile
# Model context limits (adjust based on your model)
MODEL_LIMITS = {
"default": 200000, # Claude Opus 4.5 / Sonnet
"haiku": 200000,
}
# Configuration
CONTEXT_LIMIT = 128000 # Claude's context window (adjust for your model)
USE_TIKTOKEN = False # Set True if tiktoken is installed for better accuracy
def read_transcript(transcript_path: str) -> list:
"""Read JSONL transcript file and return list of messages."""
messages = []
if not os.path.exists(transcript_path):
return messages
with open(transcript_path, 'r', encoding='utf-8') as f:
def get_state_file(session_id: str) -> str:
"""Get temp file path for storing pre-message token count, isolated by session."""
return os.path.join(tempfile.gettempdir(), f"claude-context-{session_id}.json")
def count_tokens(text: str) -> int:
"""
Count tokens in text.
Uses tiktoken with p50k_base encoding if available (~90-95% accuracy),
otherwise falls back to character estimation (~80-90% accuracy).
"""
if USE_TIKTOKEN:
try:
import tiktoken
enc = tiktoken.get_encoding("p50k_base")
return len(enc.encode(text))
except ImportError:
pass # Fall back to estimation
# Character-based estimation: ~4 characters per token for English
return len(text) // 4
def read_transcript(transcript_path: str) -> str:
"""Read and concatenate all content from transcript file."""
if not transcript_path or not os.path.exists(transcript_path):
return ""
content = []
with open(transcript_path, "r") as f:
for line in f:
line = line.strip()
if line:
try:
messages.append(json.loads(line))
except json.JSONDecodeError:
continue
return messages
try:
entry = json.loads(line.strip())
# Extract text content from various message formats
if "message" in entry:
msg = entry["message"]
if isinstance(msg.get("content"), str):
content.append(msg["content"])
elif isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") == "text":
content.append(block.get("text", ""))
except json.JSONDecodeError:
continue
def calculate_usage(messages: list) -> tuple[int, int]:
"""Calculate total characters and estimated tokens from messages."""
total_chars = 0
return "\n".join(content)
for msg in messages:
# Handle different message formats in transcript
if isinstance(msg, dict):
# Check common content fields
content = msg.get('content', '')
if isinstance(content, str):
total_chars += len(content)
elif isinstance(content, list):
# Handle content blocks (text, tool_use, etc.)
for block in content:
if isinstance(block, dict):
text = block.get('text', '') or block.get('content', '')
total_chars += len(str(text))
elif isinstance(block, str):
total_chars += len(block)
# Also count tool inputs/outputs
tool_input = msg.get('tool_input', {})
if tool_input:
total_chars += len(json.dumps(tool_input))
def handle_user_prompt_submit(data: dict) -> None:
"""Pre-message hook: Save current token count before request."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens(transcript_content)
# Save to temp file for later comparison
state_file = get_state_file(session_id)
with open(state_file, "w") as f:
json.dump({"pre_tokens": current_tokens}, f)
def handle_stop(data: dict) -> None:
"""Post-response hook: Calculate and report token delta."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens(transcript_content)
# Load pre-message count
state_file = get_state_file(session_id)
pre_tokens = 0
if os.path.exists(state_file):
try:
with open(state_file, "r") as f:
state = json.load(f)
pre_tokens = state.get("pre_tokens", 0)
except (json.JSONDecodeError, IOError):
pass
# Calculate delta
delta_tokens = current_tokens - pre_tokens
remaining = CONTEXT_LIMIT - current_tokens
percentage = (current_tokens / CONTEXT_LIMIT) * 100
# Report usage
method = "tiktoken" if USE_TIKTOKEN else "estimated"
print(f"Context ({method}): ~{current_tokens:,} tokens ({percentage:.1f}% used, ~{remaining:,} remaining)", file=sys.stderr)
if delta_tokens > 0:
print(f"This request: ~{delta_tokens:,} tokens", file=sys.stderr)
estimated_tokens = total_chars // 4 # ~4 characters per token
return total_chars, estimated_tokens
def main():
# Read hook input from stdin
input_data = json.load(sys.stdin)
data = json.load(sys.stdin)
event = data.get("hook_event_name", "")
# Get transcript path from hook input
transcript_path = input_data.get('transcript_path', '')
if event == "UserPromptSubmit":
handle_user_prompt_submit(data)
elif event == "Stop":
handle_stop(data)
if not transcript_path:
# No transcript available, exit silently
sys.exit(0)
# Read and analyze transcript
messages = read_transcript(transcript_path)
total_chars, estimated_tokens = calculate_usage(messages)
# Get model limit (default to 200k)
max_tokens = MODEL_LIMITS.get("default", 200000)
# Calculate percentages
used_percent = (estimated_tokens / max_tokens) * 100
remaining_tokens = max_tokens - estimated_tokens
remaining_percent = 100 - used_percent
# Format the report (output as systemMessage so it appears in UI)
report = f"Context: ~{estimated_tokens:,}/{max_tokens:,} tokens ({remaining_percent:.1f}% remaining)"
# Output JSON with systemMessage to show in Claude Code UI
output = {
"systemMessage": report
}
print(json.dumps(output))
sys.exit(0)
if __name__ == "__main__":
main()
```
**Configuration:**
```json
{
"hooks": {
"Stop": [
{
"hooks": [
{
"type": "command",
"command": "python3 \"$CLAUDE_PROJECT_DIR/.claude/hooks/context-usage.py\"",
"timeout": 5
}
]
}
]
}
}
```
**Sample Output:**
After each Claude response, you'll see a message like:
```
Context: ~45,230/200,000 tokens (77.4% remaining)
```
**Key Points:**
| Aspect | Details |
|--------|---------|
| **Event** | `Stop` - runs after Claude finishes responding |
| **Input** | Uses `transcript_path` field to access conversation history |
| **Estimation** | ~4 characters per token (rough heuristic) |
| **Output** | `systemMessage` field displays in Claude Code UI |
| **Accuracy** | Estimate only - use `/context` for exact counts |
**Why use Stop hook instead of UserPromptSubmit?**
- `Stop` runs after Claude responds, giving a more complete picture
- `UserPromptSubmit` runs before Claude processes, missing the response
- Both work, but `Stop` shows total usage including Claude's response
**Alternative: UserPromptSubmit for Pre-Response Check**
If you want to check context BEFORE Claude processes your prompt:
```json
{
"hooks": {
@@ -652,7 +627,17 @@ If you want to check context BEFORE Claude processes your prompt:
"hooks": [
{
"type": "command",
"command": "python3 \"$CLAUDE_PROJECT_DIR/.claude/hooks/context-usage.py\""
"command": "python3 \"$CLAUDE_PROJECT_DIR/.claude/hooks/context-tracker.py\""
}
]
}
],
"Stop": [
{
"hooks": [
{
"type": "command",
"command": "python3 \"$CLAUDE_PROJECT_DIR/.claude/hooks/context-tracker.py\""
}
]
}
@@ -661,6 +646,20 @@ If you want to check context BEFORE Claude processes your prompt:
}
```
**How it works:**
1. `UserPromptSubmit` fires before your prompt is processed - saves current token count
2. `Stop` fires after Claude responds - calculates delta and reports usage
3. Each session is isolated via `session_id` in the temp filename
**Token Counting Methods:**
| Method | Accuracy | Dependencies | Speed |
|--------|----------|--------------|-------|
| Character estimation | ~80-90% | None | <1ms |
| tiktoken (p50k_base) | ~90-95% | `pip install tiktoken` | <10ms |
> **Note:** Anthropic hasn't released an official offline tokenizer. Both methods are approximations. The transcript includes user prompts, Claude's responses, and tool outputs, but NOT system prompts or internal context.
## MCP Tool Hooks
MCP tools follow the pattern `mcp__<server>__<tool>`:
+149
View File
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
Context Usage Tracker (tiktoken version) - Tracks token consumption per request.
Uses UserPromptSubmit as "pre-message" hook and Stop as "post-response" hook
to calculate the delta in token usage for each request.
This version uses tiktoken with p50k_base encoding for ~90-95% accuracy.
Requires: pip install tiktoken
For a zero-dependency version, see context-tracker.py.
Usage:
Configure both hooks to use the same script:
- UserPromptSubmit: saves current token count
- Stop: calculates delta and reports usage
"""
import json
import os
import sys
import tempfile
try:
import tiktoken
TIKTOKEN_AVAILABLE = True
except ImportError:
TIKTOKEN_AVAILABLE = False
print(
"Warning: tiktoken not installed. Install with: pip install tiktoken",
file=sys.stderr,
)
# Configuration
CONTEXT_LIMIT = 128000 # Claude's context window (adjust for your model)
def get_state_file(session_id: str) -> str:
"""Get temp file path for storing pre-message token count, isolated by session."""
return os.path.join(tempfile.gettempdir(), f"claude-context-{session_id}.json")
def count_tokens(text: str) -> int:
"""
Count tokens using tiktoken with p50k_base encoding.
This provides ~90-95% accuracy compared to Claude's actual tokenizer.
Falls back to character estimation if tiktoken is not available.
Note: Anthropic hasn't released an official offline tokenizer.
tiktoken with p50k_base is a reasonable approximation since both
Claude and GPT models use BPE (byte-pair encoding).
"""
if TIKTOKEN_AVAILABLE:
enc = tiktoken.get_encoding("p50k_base")
return len(enc.encode(text))
else:
# Fallback to character estimation (~4 chars per token)
return len(text) // 4
def read_transcript(transcript_path: str) -> str:
"""Read and concatenate all content from transcript file."""
if not transcript_path or not os.path.exists(transcript_path):
return ""
content = []
with open(transcript_path, "r") as f:
for line in f:
try:
entry = json.loads(line.strip())
# Extract text content from various message formats
if "message" in entry:
msg = entry["message"]
if isinstance(msg.get("content"), str):
content.append(msg["content"])
elif isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") == "text":
content.append(block.get("text", ""))
except json.JSONDecodeError:
continue
return "\n".join(content)
def handle_user_prompt_submit(data: dict) -> None:
"""Pre-message hook: Save current token count before request."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens(transcript_content)
# Save to temp file for later comparison
state_file = get_state_file(session_id)
with open(state_file, "w") as f:
json.dump({"pre_tokens": current_tokens}, f)
def handle_stop(data: dict) -> None:
"""Post-response hook: Calculate and report token delta."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens(transcript_content)
# Load pre-message count
state_file = get_state_file(session_id)
pre_tokens = 0
if os.path.exists(state_file):
try:
with open(state_file, "r") as f:
state = json.load(f)
pre_tokens = state.get("pre_tokens", 0)
except (json.JSONDecodeError, IOError):
pass
# Calculate delta
delta_tokens = current_tokens - pre_tokens
remaining = CONTEXT_LIMIT - current_tokens
percentage = (current_tokens / CONTEXT_LIMIT) * 100
# Report usage (stderr so it doesn't interfere with hook output)
method = "tiktoken" if TIKTOKEN_AVAILABLE else "estimated"
print(
f"Context ({method}): ~{current_tokens:,} tokens "
f"({percentage:.1f}% used, ~{remaining:,} remaining)",
file=sys.stderr,
)
if delta_tokens > 0:
print(f"This request: ~{delta_tokens:,} tokens", file=sys.stderr)
def main():
data = json.load(sys.stdin)
event = data.get("hook_event_name", "")
if event == "UserPromptSubmit":
handle_user_prompt_submit(data)
elif event == "Stop":
handle_stop(data)
sys.exit(0)
if __name__ == "__main__":
main()
+126
View File
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
Context Usage Tracker - Tracks token consumption per request.
Uses UserPromptSubmit as "pre-message" hook and Stop as "post-response" hook
to calculate the delta in token usage for each request.
This version uses character-based estimation (no dependencies).
For better accuracy, see context-tracker-tiktoken.py.
Usage:
Configure both hooks to use the same script:
- UserPromptSubmit: saves current token count
- Stop: calculates delta and reports usage
"""
import json
import os
import sys
import tempfile
# Configuration
CONTEXT_LIMIT = 128000 # Claude's context window (adjust for your model)
def get_state_file(session_id: str) -> str:
"""Get temp file path for storing pre-message token count, isolated by session."""
return os.path.join(tempfile.gettempdir(), f"claude-context-{session_id}.json")
def count_tokens_estimate(text: str) -> int:
"""
Estimate token count using character-based approximation.
Uses ~4 characters per token ratio, which provides ~80-90% accuracy
for English text. Less accurate for code and non-English text.
"""
return len(text) // 4
def read_transcript(transcript_path: str) -> str:
"""Read and concatenate all content from transcript file."""
if not transcript_path or not os.path.exists(transcript_path):
return ""
content = []
with open(transcript_path, "r") as f:
for line in f:
try:
entry = json.loads(line.strip())
# Extract text content from various message formats
if "message" in entry:
msg = entry["message"]
if isinstance(msg.get("content"), str):
content.append(msg["content"])
elif isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") == "text":
content.append(block.get("text", ""))
except json.JSONDecodeError:
continue
return "\n".join(content)
def handle_user_prompt_submit(data: dict) -> None:
"""Pre-message hook: Save current token count before request."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens_estimate(transcript_content)
# Save to temp file for later comparison
state_file = get_state_file(session_id)
with open(state_file, "w") as f:
json.dump({"pre_tokens": current_tokens}, f)
def handle_stop(data: dict) -> None:
"""Post-response hook: Calculate and report token delta."""
session_id = data.get("session_id", "unknown")
transcript_path = data.get("transcript_path", "")
transcript_content = read_transcript(transcript_path)
current_tokens = count_tokens_estimate(transcript_content)
# Load pre-message count
state_file = get_state_file(session_id)
pre_tokens = 0
if os.path.exists(state_file):
try:
with open(state_file, "r") as f:
state = json.load(f)
pre_tokens = state.get("pre_tokens", 0)
except (json.JSONDecodeError, IOError):
pass
# Calculate delta
delta_tokens = current_tokens - pre_tokens
remaining = CONTEXT_LIMIT - current_tokens
percentage = (current_tokens / CONTEXT_LIMIT) * 100
# Report usage (stderr so it doesn't interfere with hook output)
print(
f"Context (estimated): ~{current_tokens:,} tokens "
f"({percentage:.1f}% used, ~{remaining:,} remaining)",
file=sys.stderr,
)
if delta_tokens > 0:
print(f"This request: ~{delta_tokens:,} tokens", file=sys.stderr)
def main():
data = json.load(sys.stdin)
event = data.get("hook_event_name", "")
if event == "UserPromptSubmit":
handle_user_prompt_submit(data)
elif event == "Stop":
handle_stop(data)
sys.exit(0)
if __name__ == "__main__":
main()