Deploying Your Customer Service Model¶
Let's deploy a GPT model optimized for customer service interactions. This will take about 15 minutes.
Quick Model Deployment¶
1. Select the Model¶
from azure.identity import DefaultAzureCredential
from azure.ai.resources import AIProjectClient
import os
def deploy_customer_service_model():
"""Deploy a GPT model for customer service."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Configure deployment
deployment_config = {
"model": {
"name": "gpt-35-turbo", # or your preferred model
"version": "0301"
},
"compute": {
"instance_type": "Standard_DS3_v2",
"instance_count": 1
},
"settings": {
"max_tokens": 1000,
"temperature": 0.7,
"top_p": 0.95
}
}
# Create deployment
deployment = client.models.deploy(
model_name=deployment_config["model"]["name"],
deployment_name="customer-service-v1",
configuration=deployment_config
)
return deployment
except Exception as e:
print(f"Deployment error: {str(e)}")
raise
## Deployment Process
### 1. Environment Setup
```python
from azure.identity import DefaultAzureCredential
from azure.ai.resources import AIProjectClient
import os
def setup_deployment_environment():
"""Set up the environment for model deployment."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Configure deployment environment
environment_config = {
"compute": {
"vm_size": "Standard_DS3_v2",
"min_nodes": 1,
"max_nodes": 4
},
"network": {
"virtual_network": "ai-vnet",
"subnet": "model-subnet",
"private_link_enabled": True
},
"security": {
"encryption_type": "CustomerManaged",
"key_vault_id": os.getenv("KEY_VAULT_ID"),
"network_isolation": True
},
"monitoring": {
"workspace_id": os.getenv("LOG_ANALYTICS_WORKSPACE_ID"),
"metrics_enabled": True,
"logs_enabled": True
}
}
# Apply environment configuration
client.deployments.configure_environment(
environment_config=environment_config
)
return True
except Exception as e:
print(f"Error setting up deployment environment: {str(e)}")
raise
2. Deployment Configuration¶
def configure_model_deployment(model_name: str, version: str):
"""Configure model deployment settings."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Define deployment configuration
deployment_config = {
"compute": {
"instance_type": "Standard_DS3_v2",
"instance_count": 2,
"autoscale_enabled": True,
"min_replicas": 1,
"max_replicas": 5
},
"endpoint": {
"throughput_limit": 100,
"max_concurrent_requests": 20,
"request_timeout": 30
},
"performance": {
"max_tokens": 2000,
"temperature": 0.7,
"top_p": 0.95,
"frequency_penalty": 0.0,
"presence_penalty": 0.0
}
}
# Apply deployment configuration
deployment = client.models.create_deployment(
model_name=model_name,
version=version,
deployment_name=f"{model_name}-deployment",
configuration=deployment_config
)
return deployment
except Exception as e:
print(f"Error configuring model deployment: {str(e)}")
raise
3. Deployment Methods¶
def deploy_model_with_strategy(model_name: str, version: str, strategy: str):
"""Deploy model using specified deployment strategy."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
if strategy == "single":
# Single model deployment
deployment = client.models.deploy(
model_name=model_name,
version=version,
deployment_name=f"{model_name}-prod"
)
elif strategy == "multi":
# Multi-model deployment
deployment = client.models.deploy_multi(
models=[
{"name": model_name, "version": version},
{"name": f"{model_name}-backup", "version": "latest"}
],
deployment_name=f"{model_name}-multi"
)
elif strategy == "ab":
# A/B testing deployment
deployment = client.models.deploy_ab_test(
models=[
{
"name": model_name,
"version": version,
"traffic_percentage": 90
},
{
"name": model_name,
"version": "experimental",
"traffic_percentage": 10
}
],
deployment_name=f"{model_name}-ab"
)
elif strategy == "staged":
# Staged rollout
deployment = client.models.deploy_staged(
model_name=model_name,
version=version,
deployment_name=f"{model_name}-staged",
stages=[
{"percentage": 10, "duration_hours": 24},
{"percentage": 50, "duration_hours": 24},
{"percentage": 100, "duration_hours": 24}
]
)
return deployment
except Exception as e:
print(f"Error deploying model: {str(e)}")
raise
Production Considerations¶
1. Performance Optimization¶
def optimize_deployment_performance(deployment_name: str):
"""Optimize deployment performance settings."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Get current performance metrics
metrics = client.deployments.get_metrics(
deployment_name=deployment_name,
metric_names=[
"latency_p95",
"requests_per_second",
"token_utilization",
"compute_utilization"
]
)
# Optimize based on metrics
optimization_config = {
"compute": {
"instance_type": "Standard_DS4_v2" if metrics["compute_utilization"] > 80 else "Standard_DS3_v2",
"instance_count": max(1, int(metrics["requests_per_second"] / 50))
},
"performance": {
"cache_enabled": True,
"batch_size": 32,
"optimization_level": "memory_optimized" if metrics["token_utilization"] > 80 else "balanced"
}
}
# Apply optimization
client.deployments.update_configuration(
deployment_name=deployment_name,
configuration=optimization_config
)
return True
except Exception as e:
print(f"Error optimizing deployment performance: {str(e)}")
raise
2. Monitoring and Logging¶
def setup_deployment_monitoring(deployment_name: str):
"""Configure comprehensive monitoring and logging."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Configure monitoring
monitoring_config = {
"metrics": {
"latency": True,
"throughput": True,
"error_rate": True,
"token_usage": True,
"compute_usage": True
},
"logging": {
"level": "Information",
"request_logging": True,
"response_logging": True,
"error_logging": True
},
"alerts": [
{
"metric": "error_rate",
"threshold": 0.01,
"window_minutes": 5,
"action": "notify"
},
{
"metric": "latency_p95",
"threshold": 1000,
"window_minutes": 5,
"action": "scale"
}
]
}
# Apply monitoring configuration
client.deployments.configure_monitoring(
deployment_name=deployment_name,
monitoring_config=monitoring_config
)
return True
except Exception as e:
print(f"Error setting up monitoring: {str(e)}")
raise
3. Security Management¶
def configure_deployment_security(deployment_name: str):
"""Configure security settings for deployment."""
try:
# Initialize client
credential = DefaultAzureCredential()
client = AIProjectClient(
subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
credential=credential
)
# Configure security settings
security_config = {
"authentication": {
"type": "aad",
"required_roles": ["AI.User", "AI.Admin"],
"token_expiration_hours": 24
},
"network": {
"private_endpoint_enabled": True,
"allowed_ip_ranges": ["10.0.0.0/24"],
"virtual_network_rules": [
{
"subnet_id": "/subscriptions/.../resourceGroups/.../providers/Microsoft.Network/virtualNetworks/vnet/subnets/subnet",
"ignore_missing_endpoint": False
}
]
},
"data": {
"encryption_type": "CustomerManaged",
"key_vault_key_id": os.getenv("KEY_VAULT_KEY_ID"),
"double_encryption_enabled": True
},
"compliance": {
"audit_logging_enabled": True,
"diagnostic_settings_enabled": True,
"retention_days": 90
}
}
# Apply security configuration
client.deployments.configure_security(
deployment_name=deployment_name,
security_config=security_config
)
return True
except Exception as e:
print(f"Error configuring security: {str(e)}")
raise
Scaling Strategies¶
1. Horizontal Scaling¶
- Instance management
- Load balancing
- Traffic distribution
- Resource allocation
2. Vertical Scaling¶
- Resource adjustment
- Performance tuning
- Capacity planning
- Cost management
3. Auto-scaling¶
- Scaling rules
- Trigger conditions
- Resource limits
- Performance targets
Best Practices¶
1. Deployment Strategy¶
- Gradual rollout
- Version control
- Rollback planning
- Documentation
2. Performance Management¶
- Regular monitoring
- Performance tuning
- Resource optimization
- Cost tracking
3. Maintenance Planning¶
- Update strategy
- Backup procedures
- Disaster recovery
- Support processes
Interactive Workshop¶
For hands-on practice with model deployment in Azure AI Foundry, try our interactive notebook:
Launch Model Deployment Workshop
This notebook provides: - Step-by-step deployment configuration - Model deployment process walkthrough - Deployment monitoring and management - Scaling and updating deployments - Best practices for production deployments
Next: Testing Deployments