Files
sriphat-dataplatform/03-apiservice/app/services/airflow_client.py

153 lines
4.5 KiB
Python

"""
Airflow API Client for triggering DAGs
"""
import httpx
import logging
from typing import Dict, Any, Optional
from datetime import datetime
from app.core.config import settings
logger = logging.getLogger(__name__)
class AirflowClient:
"""Client for interacting with Airflow REST API"""
def __init__(self):
self.api_url = settings.AIRFLOW_API_URL.rstrip('/')
self.api_token = settings.AIRFLOW_API_TOKEN
self.headers = {
"Content-Type": "application/json",
"Accept": "application/json"
}
if self.api_token:
self.headers["Authorization"] = f"Bearer {self.api_token}"
else:
logger.warning("AIRFLOW_API_TOKEN not set - API calls may fail")
async def trigger_dag(
self,
dag_id: str,
conf: Dict[str, Any],
logical_date: Optional[str] = None
) -> Dict[str, Any]:
"""
Trigger an Airflow DAG run
Args:
dag_id: DAG identifier
conf: Configuration to pass to the DAG (JSON serializable dict)
logical_date: Optional logical date for the DAG run (ISO format)
Returns:
Dict containing dag_run_id and other metadata
Raises:
httpx.HTTPError: If the API call fails
"""
url = f"{self.api_url}/api/v1/dags/{dag_id}/dagRuns"
payload = {
"conf": conf
}
if logical_date:
payload["logical_date"] = logical_date
else:
payload["logical_date"] = datetime.utcnow().isoformat() + "Z"
logger.info(f"Triggering Airflow DAG: {dag_id}")
logger.debug(f"Payload: {payload}")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
url,
json=payload,
headers=self.headers
)
response.raise_for_status()
result = response.json()
logger.info(f"DAG triggered successfully: {result.get('dag_run_id')}")
return result
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error triggering DAG {dag_id}: {e.response.status_code} - {e.response.text}")
raise
except httpx.RequestError as e:
logger.error(f"Request error triggering DAG {dag_id}: {str(e)}")
raise
except Exception as e:
logger.error(f"Unexpected error triggering DAG {dag_id}: {str(e)}")
raise
async def get_dag_run_status(
self,
dag_id: str,
dag_run_id: str
) -> Dict[str, Any]:
"""
Get the status of a DAG run
Args:
dag_id: DAG identifier
dag_run_id: DAG run identifier
Returns:
Dict containing DAG run status and metadata
"""
url = f"{self.api_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except httpx.HTTPError as e:
logger.error(f"Error getting DAG run status: {str(e)}")
raise
async def trigger_finance_dag(
self,
upload_id: str,
filepath: str,
filename: str,
uploaded_by: str,
description: Optional[str] = None
) -> Dict[str, Any]:
"""
Trigger the finance Excel processing DAG
Args:
upload_id: Unique upload identifier
filepath: Full path to the uploaded file
filename: Original filename
uploaded_by: Username who uploaded the file
description: Optional description
Returns:
Dict containing dag_run_id and other metadata
"""
conf = {
"upload_id": upload_id,
"filepath": filepath,
"filename": filename,
"uploaded_by": uploaded_by,
"description": description or "",
"triggered_at": datetime.utcnow().isoformat()
}
return await self.trigger_dag(
dag_id=settings.AIRFLOW_DAG_ID_FINANCE,
conf=conf
)
# Singleton instance
airflow_client = AirflowClient()