update configuration docker setup for data platform

This commit is contained in:
jigoong
2026-05-07 17:57:42 +07:00
parent ce949dcc8f
commit 1dba772e62
53 changed files with 6732 additions and 24 deletions

View File

@@ -0,0 +1,152 @@
"""
Airflow API Client for triggering DAGs
"""
import httpx
import logging
from typing import Dict, Any, Optional
from datetime import datetime
from app.core.config import settings
logger = logging.getLogger(__name__)
class AirflowClient:
"""Client for interacting with Airflow REST API"""
def __init__(self):
self.api_url = settings.AIRFLOW_API_URL.rstrip('/')
self.api_token = settings.AIRFLOW_API_TOKEN
self.headers = {
"Content-Type": "application/json",
"Accept": "application/json"
}
if self.api_token:
self.headers["Authorization"] = f"Bearer {self.api_token}"
else:
logger.warning("AIRFLOW_API_TOKEN not set - API calls may fail")
async def trigger_dag(
self,
dag_id: str,
conf: Dict[str, Any],
logical_date: Optional[str] = None
) -> Dict[str, Any]:
"""
Trigger an Airflow DAG run
Args:
dag_id: DAG identifier
conf: Configuration to pass to the DAG (JSON serializable dict)
logical_date: Optional logical date for the DAG run (ISO format)
Returns:
Dict containing dag_run_id and other metadata
Raises:
httpx.HTTPError: If the API call fails
"""
url = f"{self.api_url}/api/v1/dags/{dag_id}/dagRuns"
payload = {
"conf": conf
}
if logical_date:
payload["logical_date"] = logical_date
else:
payload["logical_date"] = datetime.utcnow().isoformat() + "Z"
logger.info(f"Triggering Airflow DAG: {dag_id}")
logger.debug(f"Payload: {payload}")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
url,
json=payload,
headers=self.headers
)
response.raise_for_status()
result = response.json()
logger.info(f"DAG triggered successfully: {result.get('dag_run_id')}")
return result
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error triggering DAG {dag_id}: {e.response.status_code} - {e.response.text}")
raise
except httpx.RequestError as e:
logger.error(f"Request error triggering DAG {dag_id}: {str(e)}")
raise
except Exception as e:
logger.error(f"Unexpected error triggering DAG {dag_id}: {str(e)}")
raise
async def get_dag_run_status(
self,
dag_id: str,
dag_run_id: str
) -> Dict[str, Any]:
"""
Get the status of a DAG run
Args:
dag_id: DAG identifier
dag_run_id: DAG run identifier
Returns:
Dict containing DAG run status and metadata
"""
url = f"{self.api_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except httpx.HTTPError as e:
logger.error(f"Error getting DAG run status: {str(e)}")
raise
async def trigger_finance_dag(
self,
upload_id: str,
filepath: str,
filename: str,
uploaded_by: str,
description: Optional[str] = None
) -> Dict[str, Any]:
"""
Trigger the finance Excel processing DAG
Args:
upload_id: Unique upload identifier
filepath: Full path to the uploaded file
filename: Original filename
uploaded_by: Username who uploaded the file
description: Optional description
Returns:
Dict containing dag_run_id and other metadata
"""
conf = {
"upload_id": upload_id,
"filepath": filepath,
"filename": filename,
"uploaded_by": uploaded_by,
"description": description or "",
"triggered_at": datetime.utcnow().isoformat()
}
return await self.trigger_dag(
dag_id=settings.AIRFLOW_DAG_ID_FINANCE,
conf=conf
)
# Singleton instance
airflow_client = AirflowClient()