update configuration docker setup for data platform

This commit is contained in:
jigoong
2026-05-07 17:57:42 +07:00
parent ce949dcc8f
commit 1dba772e62
53 changed files with 6732 additions and 24 deletions

View File

@@ -0,0 +1 @@
# Routes package

View File

@@ -0,0 +1,156 @@
"""
User and Role management endpoints (Admin only)
Note: This manages local user records synced from Keycloak
API endpoints (/api/v1/*) are NOT affected and continue using API Key authentication
"""
from typing import List
from fastapi import APIRouter, Depends, HTTPException, Request
from sqlalchemy.orm import Session
from pydantic import BaseModel
from datetime import datetime
from app.db.session import get_db
from app.models.user import User, Role
from app.security.permissions import require_role, Roles
import logging
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin/users", tags=["admin-users"])
# Pydantic schemas
class RoleSchema(BaseModel):
id: int
name: str
description: str | None = None
class Config:
from_attributes = True
class UserSchema(BaseModel):
id: int
keycloak_id: str
username: str
email: str | None = None
full_name: str | None = None
is_active: bool
roles: List[RoleSchema] = []
last_login: datetime | None = None
created_at: datetime
class Config:
from_attributes = True
class UserCreateSchema(BaseModel):
keycloak_id: str
username: str
email: str | None = None
full_name: str | None = None
class UserUpdateSchema(BaseModel):
email: str | None = None
full_name: str | None = None
is_active: bool | None = None
role_ids: List[int] | None = None
@router.get("/", response_model=List[UserSchema])
async def list_users(
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""List all users (Admin only)"""
users = db.query(User).all()
return users
@router.get("/{user_id}", response_model=UserSchema)
async def get_user(
user_id: int,
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""Get user by ID (Admin only)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=404, detail="User not found")
return user
@router.post("/", response_model=UserSchema)
async def create_user(
user_data: UserCreateSchema,
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""Create new user record (Admin only)"""
# Check if user already exists
existing = db.query(User).filter(User.keycloak_id == user_data.keycloak_id).first()
if existing:
raise HTTPException(status_code=400, detail="User already exists")
user = User(**user_data.dict())
db.add(user)
db.commit()
db.refresh(user)
return user
@router.put("/{user_id}", response_model=UserSchema)
async def update_user(
user_id: int,
user_data: UserUpdateSchema,
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""Update user (Admin only)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=404, detail="User not found")
# Update fields
if user_data.email is not None:
user.email = user_data.email
if user_data.full_name is not None:
user.full_name = user_data.full_name
if user_data.is_active is not None:
user.is_active = user_data.is_active
# Update roles
if user_data.role_ids is not None:
roles = db.query(Role).filter(Role.id.in_(user_data.role_ids)).all()
user.roles = roles
db.commit()
db.refresh(user)
return user
@router.delete("/{user_id}")
async def delete_user(
user_id: int,
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""Delete user (Admin only)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=404, detail="User not found")
db.delete(user)
db.commit()
return {"message": "User deleted successfully"}
@router.get("/roles/", response_model=List[RoleSchema])
async def list_roles(
db: Session = Depends(get_db),
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""List all roles (Admin only)"""
roles = db.query(Role).all()
return roles

View File

@@ -0,0 +1,252 @@
"""
Authentication routes for Keycloak web login
Note: These routes are ONLY for web UI authentication
API endpoints use API Key authentication separately
"""
from fastapi import APIRouter, Request, HTTPException, Query
from fastapi.responses import RedirectResponse
from app.security.keycloak_auth import (
get_keycloak_client,
get_login_url,
get_logout_url,
get_current_user
)
from app.core.config import settings
import logging
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/auth", tags=["authentication"])
@router.get("/login")
async def login(request: Request, redirect_to: str = Query(default="/")):
"""
Redirect to Keycloak login page
Args:
redirect_to: Path to redirect after successful login
"""
# Check if already logged in
user = get_current_user(request)
if user:
return RedirectResponse(url=redirect_to)
# Generate Keycloak login URL
login_url = get_login_url(redirect_to)
return RedirectResponse(url=login_url)
@router.get("/callback")
async def auth_callback(
request: Request,
code: str = Query(...),
state: str = Query(default="/")
):
"""
Handle Keycloak callback after login
Args:
code: Authorization code from Keycloak
state: Original redirect path
"""
try:
if settings.DEBUG_AUTH:
logger.info("=" * 80)
logger.info("AUTHENTICATION CALLBACK RECEIVED")
logger.info(f"Authorization code: {code[:20]}...{code[-20:] if len(code) > 40 else code}")
logger.info(f"State (redirect_to): {state}")
logger.info(f"Request URL: {request.url}")
logger.info(f"Request headers: {dict(request.headers)}")
keycloak_client = get_keycloak_client()
if settings.DEBUG_AUTH:
logger.info("-" * 80)
logger.info("EXCHANGING CODE FOR TOKENS")
logger.info(f"Grant type: authorization_code")
logger.info(f"Code: {code[:20]}...")
logger.info(f"Redirect URI: {settings.KEYCLOAK_REDIRECT_URI}")
# Exchange authorization code for tokens
token_response = keycloak_client.token(
grant_type="authorization_code",
code=code,
redirect_uri=settings.KEYCLOAK_REDIRECT_URI
)
if settings.DEBUG_AUTH:
logger.info("TOKEN RESPONSE RECEIVED")
logger.info(f"Access token: {'*' * 20}...{token_response.get('access_token', '')[-20:] if token_response.get('access_token') else 'NONE'}")
logger.info(f"Refresh token: {'Present' if token_response.get('refresh_token') else 'None'}")
logger.info(f"Token type: {token_response.get('token_type', 'N/A')}")
logger.info(f"Expires in: {token_response.get('expires_in', 'N/A')} seconds")
# Get user information from token
access_token = token_response.get("access_token")
if not access_token:
logger.error("No access token in response!")
if settings.DEBUG_AUTH:
logger.error(f"Full token response: {token_response}")
raise HTTPException(
status_code=400,
detail="No access token received from Keycloak"
)
if settings.DEBUG_AUTH:
logger.info("-" * 80)
logger.info("FETCHING USER INFO")
from jose import jwt
userinfo = jwt.decode(access_token, key="", options={"verify_signature": False, "verify_aud": False})
logger.info(f"Decoded access_token: {userinfo.get('preferred_username')}")
# # 2. ดึง id_token ออกมา (ตัวนี้คือหัวใจของ OIDC)
# id_token = token_response.get("id_token")
# if not id_token:
# logger.error("No id_token in response!")
# if settings.DEBUG_AUTH:
# logger.error(f"Full token response: {token_response}")
# raise HTTPException(
# status_code=400,
# detail="No id_token received from Keycloak"
# )
# # 3. Decode id_token เพื่อเอาข้อมูล User (ไม่ต้องใช้ Key เพราะเราเชื่อถือ Connection นี้)
# userinfo = jwt.decode(
# id_token,
# key="",
# options={"verify_signature": False, "verify_aud": False}
# )
#userinfo = keycloak_client.userinfo(access_token)
# Extract roles from token
roles = []
# 1. Realm roles (roles ระดับ realm)
if "realm_access" in userinfo and "roles" in userinfo["realm_access"]:
roles.extend(userinfo["realm_access"]["roles"])
# 2. Client roles (roles เฉพาะ client apiservice)
if "resource_access" in userinfo and settings.KEYCLOAK_CLIENT_ID in userinfo["resource_access"]:
client_roles = userinfo["resource_access"][settings.KEYCLOAK_CLIENT_ID].get("roles", [])
roles.extend(client_roles)
# Filter to only include our application roles
user_roles = [r for r in roles if r in ["admin", "operation"]]
if settings.DEBUG_AUTH:
logger.info("USER INFO RECEIVED")
logger.info(f"Username: {userinfo.get('preferred_username')}")
logger.info(f"Email: {userinfo.get('email')}")
logger.info(f"Name: {userinfo.get('name')}")
logger.info(f"Sub (User ID): {userinfo.get('sub')}")
logger.info(f"All roles from token: {roles}")
logger.info(f"Filtered user roles: {user_roles}")
logger.info(f"Full userinfo keys: {list(userinfo.keys())}")
# Store user info, roles, and tokens in session
user_session_data = {
"username": userinfo.get("preferred_username"),
"email": userinfo.get("email"),
"name": userinfo.get("name", userinfo.get("preferred_username")),
"sub": userinfo.get("sub"), # User ID
"roles": user_roles, # User roles
"access_token": access_token,
"refresh_token": token_response.get("refresh_token")
}
request.session["user"] = user_session_data
if settings.DEBUG_AUTH:
logger.info("-" * 80)
logger.info("SESSION UPDATED")
logger.info(f"Session user data: {dict((k, v) for k, v in user_session_data.items() if k not in ['access_token', 'refresh_token'])}")
logger.info(f"User {userinfo.get('preferred_username')} logged in successfully")
# Redirect to original destination
redirect_url = state if state else "/"
# Ensure redirect URL starts with root_path if set
if settings.ROOT_PATH and not redirect_url.startswith(settings.ROOT_PATH):
redirect_url = f"{settings.ROOT_PATH}{redirect_url}"
if settings.DEBUG_AUTH:
logger.info(f"Redirecting to: {redirect_url}")
logger.info("=" * 80)
return RedirectResponse(url=redirect_url, status_code=302)
except Exception as e:
logger.error(f"Authentication callback failed: {e}")
if settings.DEBUG_AUTH:
import traceback
logger.error("FULL TRACEBACK:")
logger.error(traceback.format_exc())
logger.error("=" * 80)
raise HTTPException(
status_code=400,
detail=f"Authentication failed: {str(e)}"
)
@router.get("/logout")
async def logout(request: Request):
"""
Logout user and clear session
Redirects to Keycloak logout page
"""
user = get_current_user(request)
# Clear session
request.session.clear()
if user:
logger.info(f"User {user.get('username')} logged out")
# Get Keycloak logout URL
redirect_uri = f"{settings.ROOT_PATH}/" if settings.ROOT_PATH else "/"
logout_url = get_logout_url(redirect_uri)
return RedirectResponse(url=logout_url)
@router.get("/user")
async def get_user_info(request: Request):
"""
Get current authenticated user information
Returns 401 if not authenticated
"""
user = get_current_user(request)
if not user:
raise HTTPException(
status_code=401,
detail="Not authenticated"
)
# Return user info without sensitive tokens
return {
"username": user.get("username"),
"email": user.get("email"),
"name": user.get("name"),
"sub": user.get("sub")
}
@router.get("/status")
async def auth_status(request: Request):
"""
Check authentication status
Returns whether user is logged in
"""
user = get_current_user(request)
return {
"authenticated": user is not None,
"user": {
"username": user.get("username"),
"name": user.get("name")
} if user else None
}

View File

@@ -0,0 +1,305 @@
"""
Web page routes for the application
"""
import os
from datetime import datetime
from pathlib import Path
from typing import List
from fastapi import APIRouter, Request, UploadFile, File, Form, HTTPException, Depends
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from sqlalchemy.orm import Session
import asyncio
import logging
from app.core.config import settings
from app.security.permissions import require_role, Roles
from app.db.session import get_db
from app.models.upload import UploadHistory
from app.services.airflow_client import airflow_client
logger = logging.getLogger(__name__)
router = APIRouter()
# Setup templates
templates_dir = Path(__file__).parent.parent / "templates"
templates = Jinja2Templates(directory=str(templates_dir))
# Upload directory
UPLOAD_DIR = Path("/data/uploads")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
class UploadRecordSchema(BaseModel):
id: int
upload_id: str
filename: str
filepath: str
description: str | None = None
status: str
job_id: str | None = None
logs: str | None = None
uploaded_by: str | None = None
uploaded_at: datetime
updated_at: datetime | None = None
class Config:
from_attributes = True
@router.get("/", response_class=HTMLResponse)
async def index(request: Request):
"""Landing page with navigation menu"""
user = request.session.get("user")
return templates.TemplateResponse(
"index.html",
{
"request": request,
"root_path": settings.ROOT_PATH,
"user": user
}
)
@router.get("/data-management/finance", response_class=HTMLResponse)
async def finance_page(request: Request):
"""Finance Excel upload page - requires operation or admin role"""
user = request.session.get("user")
return templates.TemplateResponse(
"data_management_finance.html",
{
"request": request,
"root_path": settings.ROOT_PATH,
"user": user
}
)
@router.get("/admin/users", response_class=HTMLResponse)
async def admin_users_page(
request: Request,
current_user: dict = Depends(require_role(Roles.ADMIN))
):
"""User management page - Admin only"""
return templates.TemplateResponse(
"admin_users.html",
{
"request": request,
"root_path": settings.ROOT_PATH,
"user": current_user
}
)
@router.post("/data-management/finance/upload")
async def upload_finance_file(
request: Request,
file: UploadFile = File(...),
description: str = Form(None),
db: Session = Depends(get_db)
):
"""
Handle finance Excel file upload
- Saves file to /data/uploads/
- Stores upload record in database
- Triggers Airflow job (to be implemented)
- Returns upload record
"""
# Validate file type
if not file.filename.endswith(('.xlsx', '.xls')):
raise HTTPException(
status_code=400,
detail="Invalid file type. Only .xlsx and .xls files are allowed."
)
# Generate unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_filename = file.filename.replace(" ", "_")
unique_filename = f"{timestamp}_{safe_filename}"
filepath = UPLOAD_DIR / unique_filename
# Save file
try:
content = await file.read()
with open(filepath, "wb") as f:
f.write(content)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to save file: {str(e)}"
)
# Get username from session
user = request.session.get("user")
username = user.get("username") if user else "anonymous"
# Create upload record in database
upload_id = f"upload_{timestamp}"
upload_record = UploadHistory(
upload_id=upload_id,
filename=file.filename,
filepath=str(filepath),
description=description,
status="pending",
uploaded_by=username
)
db.add(upload_record)
db.commit()
db.refresh(upload_record)
# Trigger Airflow DAG with retry logic
airflow_triggered = False
dag_run_id = None
error_msg = None
max_retries = 3
retry_delay = 10 # seconds
for attempt in range(max_retries):
try:
logger.info(f"Triggering Airflow DAG (attempt {attempt + 1}/{max_retries})")
result = await airflow_client.trigger_finance_dag(
upload_id=upload_id,
filepath=str(filepath),
filename=file.filename,
uploaded_by=username,
description=description
)
dag_run_id = result.get("dag_run_id")
airflow_triggered = True
# Update upload record with Airflow info
upload_record.airflow_dag_run_id = dag_run_id
upload_record.airflow_state = result.get("state", "queued")
upload_record.status = "processing"
db.commit()
logger.info(f"Airflow DAG triggered successfully: {dag_run_id}")
break
except Exception as e:
error_msg = str(e)
logger.error(f"Failed to trigger Airflow (attempt {attempt + 1}/{max_retries}): {error_msg}")
if attempt < max_retries - 1:
logger.info(f"Retrying in {retry_delay} seconds...")
await asyncio.sleep(retry_delay)
else:
logger.error(f"All {max_retries} attempts failed to trigger Airflow")
upload_record.status = "error"
upload_record.error_message = f"Failed to trigger Airflow after {max_retries} attempts: {error_msg}"
db.commit()
return {
"success": True,
"message": f"File '{file.filename}' uploaded successfully",
"upload_id": upload_id,
"filename": unique_filename,
"airflow_triggered": airflow_triggered,
"dag_run_id": dag_run_id,
"error": error_msg if not airflow_triggered else None
}
@router.get("/data-management/finance/uploads")
async def get_uploads(db: Session = Depends(get_db)):
"""Get list of all uploads with their status"""
uploads = db.query(UploadHistory).order_by(UploadHistory.uploaded_at.desc()).all()
# Convert to dict for JSON response
return [
{
"id": upload.upload_id,
"filename": upload.filename,
"filepath": upload.filepath,
"uploaded_at": upload.uploaded_at.isoformat(),
"description": upload.description,
"status": upload.status,
"job_id": upload.job_id,
"logs": upload.logs,
"uploaded_by": upload.uploaded_by,
"airflow_dag_run_id": upload.airflow_dag_run_id,
"airflow_state": upload.airflow_state,
"processing_started_at": upload.processing_started_at.isoformat() if upload.processing_started_at else None,
"processing_completed_at": upload.processing_completed_at.isoformat() if upload.processing_completed_at else None,
"error_message": upload.error_message
}
for upload in uploads
]
@router.get("/data-management/finance/uploads/{upload_id}")
async def get_upload_status(upload_id: str, db: Session = Depends(get_db)):
"""Get status of a specific upload"""
upload = db.query(UploadHistory).filter(UploadHistory.upload_id == upload_id).first()
if not upload:
raise HTTPException(status_code=404, detail="Upload not found")
return {
"id": upload.upload_id,
"filename": upload.filename,
"filepath": upload.filepath,
"uploaded_at": upload.uploaded_at.isoformat(),
"description": upload.description,
"status": upload.status,
"job_id": upload.job_id,
"logs": upload.logs,
"uploaded_by": upload.uploaded_by,
"airflow_dag_run_id": upload.airflow_dag_run_id,
"airflow_state": upload.airflow_state,
"processing_started_at": upload.processing_started_at.isoformat() if upload.processing_started_at else None,
"processing_completed_at": upload.processing_completed_at.isoformat() if upload.processing_completed_at else None,
"error_message": upload.error_message
}
# Placeholder for Airflow integration
async def trigger_airflow_job(filepath: str, upload_id: str) -> str:
"""
Trigger Airflow DAG to process the uploaded file
Args:
filepath: Path to the uploaded file
upload_id: Unique upload identifier
Returns:
job_id: Airflow job/run ID
This function will be implemented when:
- Airflow DAG ID is provided
- Airflow API endpoint is configured
"""
# TODO: Implement Airflow API call
# Example implementation:
# import httpx
#
# airflow_url = "http://airflow-webserver:8080/api/v1/dags/{dag_id}/dagRuns"
# headers = {"Content-Type": "application/json"}
# auth = ("airflow", "airflow") # Use proper credentials
#
# payload = {
# "conf": {
# "filepath": filepath,
# "upload_id": upload_id
# }
# }
#
# async with httpx.AsyncClient() as client:
# response = await client.post(
# airflow_url,
# json=payload,
# headers=headers,
# auth=auth
# )
# response.raise_for_status()
# result = response.json()
# return result["dag_run_id"]
raise NotImplementedError("Airflow integration pending DAG ID and endpoint")