Integrate Cognee service updates

This commit is contained in:
Songbird
2025-10-03 11:54:01 +02:00
parent 43d3eae1db
commit c7adfabe0a
17 changed files with 1649 additions and 99 deletions

View File

@@ -22,7 +22,7 @@ import typer
from rich.console import Console
from rich.prompt import Confirm, Prompt
from ..config import ensure_project_config
from ..config import ensure_project_config, provision_cognee_service_for_project, get_project_config
from ..database import ensure_project_db
console = Console()
@@ -107,6 +107,20 @@ def project(
_ensure_env_file(fuzzforge_dir, force)
_ensure_agents_registry(fuzzforge_dir, force)
# Provision Cognee service user/tenant/dataset if service mode is enabled
console.print("🧠 Provisioning Cognee service credentials...")
provision_result = provision_cognee_service_for_project(current_dir)
if provision_result["status"] == "success":
console.print(f" ✅ Created user: {provision_result.get('user', 'N/A')}", style="green")
console.print(f" ✅ Created tenant: {provision_result.get('tenant', 'N/A')}", style="green")
console.print(f" ✅ Created dataset: {provision_result.get('dataset', 'N/A')}", style="green")
elif provision_result["status"] == "skipped":
console.print(f" ⏭️ Skipped: {provision_result.get('message', 'N/A')}", style="dim")
elif provision_result["status"] == "error":
console.print(f" ⚠️ Warning: {provision_result.get('message', 'N/A')}", style="yellow")
console.print(" 💡 You can provision later when the service is available", style="dim")
# Create .gitignore if needed
gitignore_path = current_dir / ".gitignore"
gitignore_entries = [
@@ -211,6 +225,21 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None:
session_db_path = fuzzforge_dir / "fuzzforge_sessions.db"
session_db_rel = session_db_path.relative_to(fuzzforge_dir.parent)
project_config = get_project_config(fuzzforge_dir.parent)
cognee_cfg = project_config.cognee if project_config else None
service_url_default = "http://localhost:18000"
service_url = os.getenv("COGNEE_SERVICE_URL") or (cognee_cfg.service_url if cognee_cfg and cognee_cfg.service_url else service_url_default)
service_port = os.getenv("COGNEE_SERVICE_PORT") or "18000"
s3_bucket = os.getenv("COGNEE_S3_BUCKET") or (cognee_cfg.s3_bucket if cognee_cfg and cognee_cfg.s3_bucket else "cognee-bucket")
s3_prefix = os.getenv("COGNEE_S3_PREFIX") or (cognee_cfg.s3_prefix if cognee_cfg and cognee_cfg.s3_prefix else "cognee/projects")
service_email = os.getenv("COGNEE_SERVICE_USER_EMAIL") or (cognee_cfg.service_user_email if cognee_cfg and cognee_cfg.service_user_email else "")
service_password = os.getenv("COGNEE_SERVICE_USER_PASSWORD") or (cognee_cfg.service_user_password if cognee_cfg and cognee_cfg.service_user_password else "")
aws_endpoint = os.getenv("COGNEE_AWS_ENDPOINT_URL") or ""
aws_region = os.getenv("COGNEE_AWS_REGION") or ""
aws_access = os.getenv("COGNEE_AWS_ACCESS_KEY_ID") or ""
aws_secret = os.getenv("COGNEE_AWS_SECRET_ACCESS_KEY") or ""
env_lines = [
"# FuzzForge AI configuration",
"# Populate the API key(s) that match your LLM provider",
@@ -228,6 +257,19 @@ def _ensure_env_file(fuzzforge_dir: Path, force: bool) -> None:
"LLM_COGNEE_ENDPOINT=",
"COGNEE_MCP_URL=",
"",
"# Cognee service configuration",
"COGNEE_STORAGE_MODE=service",
f"COGNEE_SERVICE_URL={service_url}",
f"COGNEE_SERVICE_PORT={service_port}",
f"COGNEE_S3_BUCKET={s3_bucket}",
f"COGNEE_S3_PREFIX={s3_prefix}",
f"COGNEE_SERVICE_USER_EMAIL={service_email}",
f"COGNEE_SERVICE_USER_PASSWORD={service_password}",
f"COGNEE_AWS_ENDPOINT_URL={aws_endpoint}",
f"COGNEE_AWS_REGION={aws_region}",
f"COGNEE_AWS_ACCESS_KEY_ID={aws_access}",
f"COGNEE_AWS_SECRET_ACCESS_KEY={aws_secret}",
"",
"# Session persistence options: inmemory | sqlite",
"SESSION_PERSISTENCE=sqlite",
f"SESSION_DB_PATH={session_db_rel}",

View File

@@ -21,7 +21,8 @@ from __future__ import annotations
import hashlib
import os
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Literal
import asyncio
try: # Optional dependency; fall back if not installed
from dotenv import load_dotenv
@@ -76,6 +77,14 @@ class CogneeConfig(BaseModel):
backend_access_control: bool = True
project_id: Optional[str] = None
tenant_id: Optional[str] = None
mode: Literal["embedded", "service"] = "embedded"
storage_backend: Literal["filesystem", "s3"] = "filesystem"
s3_bucket: Optional[str] = None
s3_prefix: Optional[str] = None
service_env_dir: Optional[str] = None
service_user_email: Optional[str] = None
service_user_password: Optional[str] = None
service_url: Optional[str] = None
class FuzzForgeConfig(BaseModel):
@@ -138,30 +147,269 @@ class FuzzForgeConfig(BaseModel):
if not cognee.tenant_id:
cognee.tenant_id = self.project.tenant_id
changed = True
base_dir = project_dir / ".fuzzforge" / "cognee" / f"project_{self.project.id}"
data_dir = base_dir / "data"
system_dir = base_dir / "system"
for path in (
base_dir,
data_dir,
system_dir,
system_dir / "kuzu_db",
system_dir / "lancedb",
):
if not path.exists():
path.mkdir(parents=True, exist_ok=True)
if cognee.data_directory != str(data_dir):
cognee.data_directory = str(data_dir)
if not cognee.service_user_email or cognee.service_user_email.endswith("@cognee.local"):
cognee.service_user_email = f"project_{self.project.id}@cognee.dev"
changed = True
if cognee.system_directory != str(system_dir):
cognee.system_directory = str(system_dir)
if not cognee.service_user_password:
cognee.service_user_password = f"{self.project.id[:8]}_C0gn33!"
changed = True
mode_env = (os.getenv("COGNEE_STORAGE_MODE") or os.getenv("COGNEE_SERVICE_MODE"))
if mode_env:
normalized = mode_env.strip().lower()
if normalized in {"embedded", "service"} and normalized != cognee.mode:
cognee.mode = normalized # type: ignore[assignment]
changed = True
service_url_env = os.getenv("COGNEE_SERVICE_URL") or os.getenv("COGNEE_API_URL")
if service_url_env and cognee.service_url != service_url_env:
cognee.service_url = service_url_env
changed = True
if cognee.mode not in {"embedded", "service"}:
cognee.mode = "service"
changed = True
if cognee.mode != "service":
cognee.mode = "service"
changed = True
if cognee.mode == "service":
bucket = cognee.s3_bucket or os.getenv("COGNEE_S3_BUCKET") or os.getenv("S3_BUCKET")
if bucket and cognee.s3_bucket != bucket:
cognee.s3_bucket = bucket
changed = True
prefix = cognee.s3_prefix or os.getenv("COGNEE_S3_PREFIX")
if not prefix:
prefix = f"cognee/projects/{self.project.id}"
if cognee.s3_prefix != prefix:
cognee.s3_prefix = prefix
changed = True
if bucket:
data_dir = f"s3://{bucket}/{prefix}/data"
system_dir = f"s3://{bucket}/{prefix}/system"
if cognee.data_directory != data_dir:
cognee.data_directory = data_dir
changed = True
if cognee.system_directory != system_dir:
cognee.system_directory = system_dir
changed = True
if cognee.storage_backend != "s3":
cognee.storage_backend = "s3"
changed = True
service_dir = (
project_dir
/ ".fuzzforge"
/ "cognee"
/ "service"
/ f"project_{self.project.id}"
)
if cognee.service_env_dir != str(service_dir):
cognee.service_env_dir = str(service_dir)
changed = True
service_dir.mkdir(parents=True, exist_ok=True)
self._write_cognee_service_env(project_dir)
if cognee.mode == "embedded":
base_dir = project_dir / ".fuzzforge" / "cognee" / f"project_{self.project.id}"
data_dir = base_dir / "data"
system_dir = base_dir / "system"
for path in (
base_dir,
data_dir,
system_dir,
system_dir / "kuzu_db",
system_dir / "lancedb",
):
if not path.exists():
path.mkdir(parents=True, exist_ok=True)
if cognee.data_directory != str(data_dir):
cognee.data_directory = str(data_dir)
changed = True
if cognee.system_directory != str(system_dir):
cognee.system_directory = str(system_dir)
changed = True
if cognee.storage_backend != "filesystem":
cognee.storage_backend = "filesystem"
changed = True
if cognee.service_env_dir is not None:
cognee.service_env_dir = None
changed = True
if cognee.service_url is not None:
cognee.service_url = None
changed = True
return changed
def _write_cognee_service_env(self, project_dir: Path) -> None:
"""Generate a service .env file for Cognee containers."""
cognee = self.cognee
if cognee.mode != "service" or not cognee.service_env_dir:
return
service_dir = Path(cognee.service_env_dir)
service_dir.mkdir(parents=True, exist_ok=True)
env_path = service_dir / ".env"
def _env(*names: str) -> str:
for name in names:
value = os.getenv(name)
if value:
return value
return ""
bucket = cognee.s3_bucket or _env("COGNEE_S3_BUCKET", "S3_BUCKET")
prefix = cognee.s3_prefix or ""
backend_flag = "true" if cognee.backend_access_control else "false"
data_root = cognee.data_directory or ""
system_root = cognee.system_directory or ""
aws_key = _env("COGNEE_AWS_ACCESS_KEY_ID", "AWS_ACCESS_KEY_ID", "S3_ACCESS_KEY")
aws_secret = _env(
"COGNEE_AWS_SECRET_ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY",
"S3_SECRET_KEY",
)
aws_endpoint = _env("COGNEE_AWS_ENDPOINT_URL", "AWS_ENDPOINT_URL", "S3_ENDPOINT")
aws_region = _env("COGNEE_AWS_REGION", "AWS_REGION", "S3_REGION")
llm_provider = _env("LLM_COGNEE_PROVIDER", "COGNEE_LLM_PROVIDER", "LLM_PROVIDER")
llm_model = _env("LLM_COGNEE_MODEL", "COGNEE_LLM_MODEL", "LITELLM_MODEL")
llm_api_key = _env("LLM_COGNEE_API_KEY", "COGNEE_LLM_API_KEY", "LLM_API_KEY")
service_password = os.getenv("COGNEE_SERVICE_USER_PASSWORD", "")
lines = [
"# Auto-generated by FuzzForge. Updates when project config changes.",
"COGNEE_STORAGE_MODE=service",
"STORAGE_BACKEND=s3",
]
if bucket:
lines.append(f"STORAGE_BUCKET_NAME={bucket}")
if prefix:
lines.append(f"COGNEE_S3_PREFIX={prefix}")
if data_root:
lines.append(f"COGNEE_DATA_ROOT={data_root}")
if system_root:
lines.append(f"COGNEE_SYSTEM_ROOT={system_root}")
lines.extend(
[
f"ENABLE_BACKEND_ACCESS_CONTROL={backend_flag}",
f"GRAPH_DATABASE_PROVIDER={cognee.graph_database_provider}",
f"COGNEE_PROJECT_ID={self.project.id}",
f"COGNEE_TENANT_KEY={self.project.tenant_id}",
]
)
if cognee.service_user_email:
lines.append(f"COGNEE_SERVICE_USER_EMAIL={cognee.service_user_email}")
lines.append(f"DEFAULT_USER_EMAIL={cognee.service_user_email}")
else:
lines.append("COGNEE_SERVICE_USER_EMAIL=")
lines.append("DEFAULT_USER_EMAIL=")
if cognee.service_user_password:
service_password = cognee.service_user_password
lines.append(f"COGNEE_SERVICE_USER_PASSWORD={service_password}")
lines.append(f"DEFAULT_USER_PASSWORD={service_password}")
else:
lines.append("COGNEE_SERVICE_USER_PASSWORD=")
lines.append("DEFAULT_USER_PASSWORD=")
if aws_key:
lines.append(f"AWS_ACCESS_KEY_ID={aws_key}")
if aws_secret:
lines.append(f"AWS_SECRET_ACCESS_KEY={aws_secret}")
if aws_endpoint:
lines.append(f"AWS_ENDPOINT_URL={aws_endpoint}")
if aws_region:
lines.append(f"AWS_REGION={aws_region}")
lines.append(f"AWS_DEFAULT_REGION={aws_region}")
if llm_provider:
lines.append(f"LLM_PROVIDER={llm_provider}")
if llm_model:
lines.append(f"LLM_MODEL={llm_model}")
if llm_api_key:
lines.append(f"LLM_API_KEY={llm_api_key}")
service_url = cognee.service_url or _env("COGNEE_SERVICE_URL", "COGNEE_API_URL")
if service_url:
lines.append(f"COGNEE_SERVICE_URL={service_url}")
env_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def _provision_cognee_service_account(self, project_dir: Path) -> dict[str, str]:
"""Ensure the hosted Cognee service has a user/tenant/dataset for this project.
Returns:
Dictionary with status info: {"status": "success|error|skipped", "message": "..."}
"""
# Lazy import to avoid circular dependency
try:
from fuzzforge_ai.cognee_api_client import CogneeAPIClient
except ImportError:
return {"status": "skipped", "message": "CogneeAPIClient not available"}
if self.cognee.mode != "service":
return {"status": "skipped", "message": "Cognee mode is not 'service'"}
service_url = os.getenv("COGNEE_SERVICE_URL") or self.cognee.service_url
if not service_url:
return {"status": "skipped", "message": "No service URL configured"}
email = self.cognee.service_user_email
password = self.cognee.service_user_password
if not email or not password:
return {"status": "error", "message": "Missing service credentials"}
tenant_name = self.project.tenant_id or f"fuzzforge_project_{self.project.id}"
dataset_name = f"{self.project.name}_codebase"
async def _bootstrap():
client = CogneeAPIClient(service_url)
try:
profile = await client.ensure_user(email, password, tenant_name)
dataset_info = await client.ensure_dataset(dataset_name)
return {
"status": "success",
"message": f"Provisioned user/tenant/dataset on Cognee service",
"user": profile.get("email"),
"tenant": tenant_name,
"dataset": dataset_name,
"dataset_id": dataset_info.get("id"),
}
except Exception as exc:
return {
"status": "error",
"message": f"Failed to provision Cognee service: {exc}",
"error": str(exc),
}
finally:
await client.close()
try:
return asyncio.run(_bootstrap())
except RuntimeError as exc: # pragma: no cover - fallback where loop already running
if "event loop" in str(exc):
loop = asyncio.get_event_loop()
return loop.run_until_complete(_bootstrap())
raise
except Exception as exc:
return {
"status": "error",
"message": f"Failed to run bootstrap: {exc}",
"error": str(exc),
}
def get_api_url(self) -> str:
"""Get API URL with environment variable override."""
return os.getenv("FUZZFORGE_API_URL", self.project.api_url)
@@ -182,6 +430,15 @@ class FuzzForgeConfig(BaseModel):
"tenant_id": self.project.tenant_id or "fuzzforge_tenant",
"data_directory": self.cognee.data_directory,
"system_directory": self.cognee.system_directory,
"project_dir": str(project_dir),
"cognee_mode": self.cognee.mode,
"cognee_storage_backend": self.cognee.storage_backend,
"cognee_s3_bucket": self.cognee.s3_bucket,
"cognee_s3_prefix": self.cognee.s3_prefix,
"cognee_service_user_email": self.cognee.service_user_email,
"cognee_service_user_password": self.cognee.service_user_password,
"cognee_service_env_dir": self.cognee.service_env_dir,
"cognee_service_url": self.cognee.service_url,
}
def get_cognee_config(self, project_dir: Path) -> Dict[str, Any]:
@@ -249,6 +506,28 @@ def ensure_project_config(
return config
def provision_cognee_service_for_project(
project_dir: Optional[Path] = None,
) -> dict[str, str]:
"""Provision user/tenant/dataset on the Cognee service for this project.
Args:
project_dir: Project directory path
Returns:
Dictionary with provisioning status
"""
project_dir = Path(project_dir or Path.cwd())
config = get_project_config(project_dir)
if config is None:
return {
"status": "error",
"message": "Project not initialized. Run 'ff init project' first.",
}
return config._provision_cognee_service_account(project_dir)
def get_global_config() -> FuzzForgeConfig:
"""Get global user configuration."""
home = Path.home()
@@ -320,6 +599,53 @@ class ProjectConfigManager:
except Exception: # pragma: no cover - best effort fallback
pass
def _env(*names: str, default: str | None = None) -> str | None:
for name in names:
value = os.getenv(name)
if value:
return value
return default
storage_mode = cognee.get("mode", "embedded")
os.environ["COGNEE_STORAGE_MODE"] = storage_mode
storage_backend = cognee.get("storage_backend", "filesystem")
os.environ["STORAGE_BACKEND"] = storage_backend
if storage_backend == "s3":
bucket = cognee.get("s3_bucket") or _env("COGNEE_S3_BUCKET", "S3_BUCKET")
if bucket:
os.environ["STORAGE_BUCKET_NAME"] = bucket
prefix = cognee.get("s3_prefix") or _env("COGNEE_S3_PREFIX")
if prefix:
os.environ["COGNEE_S3_PREFIX"] = prefix
aws_key = _env("COGNEE_AWS_ACCESS_KEY_ID", "AWS_ACCESS_KEY_ID", "S3_ACCESS_KEY")
if aws_key:
os.environ["AWS_ACCESS_KEY_ID"] = aws_key
aws_secret = _env(
"COGNEE_AWS_SECRET_ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY",
"S3_SECRET_KEY",
)
if aws_secret:
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret
aws_endpoint = _env(
"COGNEE_AWS_ENDPOINT_URL",
"AWS_ENDPOINT_URL",
"S3_ENDPOINT",
)
if aws_endpoint:
os.environ["AWS_ENDPOINT_URL"] = aws_endpoint
aws_region = _env("COGNEE_AWS_REGION", "AWS_REGION", "S3_REGION")
if aws_region:
os.environ["AWS_REGION"] = aws_region
os.environ["AWS_DEFAULT_REGION"] = aws_region
backend_access = "true" if cognee.get("backend_access_control", True) else "false"
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = backend_access
os.environ["GRAPH_DATABASE_PROVIDER"] = cognee.get("graph_database_provider", "kuzu")
@@ -335,14 +661,29 @@ class ProjectConfigManager:
os.environ["COGNEE_USER_ID"] = tenant_id
os.environ["COGNEE_TENANT_ID"] = tenant_id
os.environ["COGNEE_PROJECT_ID"] = cognee.get("project_id", self._config.project.id)
service_user_email = cognee.get("service_user_email")
if service_user_email:
os.environ["COGNEE_SERVICE_USER_EMAIL"] = service_user_email
os.environ["DEFAULT_USER_EMAIL"] = service_user_email
service_password = _env(
"COGNEE_SERVICE_USER_PASSWORD",
"DEFAULT_USER_PASSWORD",
default=self._config.cognee.service_user_password,
)
if service_password:
os.environ["COGNEE_SERVICE_USER_PASSWORD"] = service_password
os.environ["DEFAULT_USER_PASSWORD"] = service_password
if cognee.get("service_env_dir"):
os.environ["COGNEE_SERVICE_ENV_PATH"] = cognee["service_env_dir"]
if cognee.get("service_url"):
os.environ["COGNEE_SERVICE_URL"] = cognee["service_url"]
if os.getenv("COGNEE_SERVICE_PORT"):
os.environ["COGNEE_SERVICE_PORT"] = os.getenv("COGNEE_SERVICE_PORT")
# Configure LLM provider defaults for Cognee. Values prefixed with COGNEE_
# take precedence so users can segregate credentials.
def _env(*names: str, default: str | None = None) -> str | None:
for name in names:
value = os.getenv(name)
if value:
return value
return default
provider = _env(
"LLM_COGNEE_PROVIDER",