Skip to content

Deploying Your Customer Service Model

Let's deploy a GPT model optimized for customer service interactions. This will take about 15 minutes.

Quick Model Deployment

1. Select the Model

from azure.identity import DefaultAzureCredential
from azure.ai.resources import AIProjectClient
import os

def deploy_customer_service_model():
    """Deploy a GPT model for customer service."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure deployment
        deployment_config = {
            "model": {
                "name": "gpt-35-turbo",  # or your preferred model
                "version": "0301"
            },
            "compute": {
                "instance_type": "Standard_DS3_v2",
                "instance_count": 1
            },
            "settings": {
                "max_tokens": 1000,
                "temperature": 0.7,
                "top_p": 0.95
            }
        }

        # Create deployment
        deployment = client.models.deploy(
            model_name=deployment_config["model"]["name"],
            deployment_name="customer-service-v1",
            configuration=deployment_config
        )

        return deployment
    except Exception as e:
        print(f"Deployment error: {str(e)}")
        raise

## Deployment Process

### 1. Environment Setup
```python
from azure.identity import DefaultAzureCredential
from azure.ai.resources import AIProjectClient
import os

def setup_deployment_environment():
    """Set up the environment for model deployment."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure deployment environment
        environment_config = {
            "compute": {
                "vm_size": "Standard_DS3_v2",
                "min_nodes": 1,
                "max_nodes": 4
            },
            "network": {
                "virtual_network": "ai-vnet",
                "subnet": "model-subnet",
                "private_link_enabled": True
            },
            "security": {
                "encryption_type": "CustomerManaged",
                "key_vault_id": os.getenv("KEY_VAULT_ID"),
                "network_isolation": True
            },
            "monitoring": {
                "workspace_id": os.getenv("LOG_ANALYTICS_WORKSPACE_ID"),
                "metrics_enabled": True,
                "logs_enabled": True
            }
        }

        # Apply environment configuration
        client.deployments.configure_environment(
            environment_config=environment_config
        )

        return True
    except Exception as e:
        print(f"Error setting up deployment environment: {str(e)}")
        raise

2. Deployment Configuration

def configure_model_deployment(model_name: str, version: str):
    """Configure model deployment settings."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Define deployment configuration
        deployment_config = {
            "compute": {
                "instance_type": "Standard_DS3_v2",
                "instance_count": 2,
                "autoscale_enabled": True,
                "min_replicas": 1,
                "max_replicas": 5
            },
            "endpoint": {
                "throughput_limit": 100,
                "max_concurrent_requests": 20,
                "request_timeout": 30
            },
            "performance": {
                "max_tokens": 2000,
                "temperature": 0.7,
                "top_p": 0.95,
                "frequency_penalty": 0.0,
                "presence_penalty": 0.0
            }
        }

        # Apply deployment configuration
        deployment = client.models.create_deployment(
            model_name=model_name,
            version=version,
            deployment_name=f"{model_name}-deployment",
            configuration=deployment_config
        )

        return deployment
    except Exception as e:
        print(f"Error configuring model deployment: {str(e)}")
        raise

3. Deployment Methods

def deploy_model_with_strategy(model_name: str, version: str, strategy: str):
    """Deploy model using specified deployment strategy."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        if strategy == "single":
            # Single model deployment
            deployment = client.models.deploy(
                model_name=model_name,
                version=version,
                deployment_name=f"{model_name}-prod"
            )

        elif strategy == "multi":
            # Multi-model deployment
            deployment = client.models.deploy_multi(
                models=[
                    {"name": model_name, "version": version},
                    {"name": f"{model_name}-backup", "version": "latest"}
                ],
                deployment_name=f"{model_name}-multi"
            )

        elif strategy == "ab":
            # A/B testing deployment
            deployment = client.models.deploy_ab_test(
                models=[
                    {
                        "name": model_name,
                        "version": version,
                        "traffic_percentage": 90
                    },
                    {
                        "name": model_name,
                        "version": "experimental",
                        "traffic_percentage": 10
                    }
                ],
                deployment_name=f"{model_name}-ab"
            )

        elif strategy == "staged":
            # Staged rollout
            deployment = client.models.deploy_staged(
                model_name=model_name,
                version=version,
                deployment_name=f"{model_name}-staged",
                stages=[
                    {"percentage": 10, "duration_hours": 24},
                    {"percentage": 50, "duration_hours": 24},
                    {"percentage": 100, "duration_hours": 24}
                ]
            )

        return deployment
    except Exception as e:
        print(f"Error deploying model: {str(e)}")
        raise

Production Considerations

1. Performance Optimization

def optimize_deployment_performance(deployment_name: str):
    """Optimize deployment performance settings."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Get current performance metrics
        metrics = client.deployments.get_metrics(
            deployment_name=deployment_name,
            metric_names=[
                "latency_p95",
                "requests_per_second",
                "token_utilization",
                "compute_utilization"
            ]
        )

        # Optimize based on metrics
        optimization_config = {
            "compute": {
                "instance_type": "Standard_DS4_v2" if metrics["compute_utilization"] > 80 else "Standard_DS3_v2",
                "instance_count": max(1, int(metrics["requests_per_second"] / 50))
            },
            "performance": {
                "cache_enabled": True,
                "batch_size": 32,
                "optimization_level": "memory_optimized" if metrics["token_utilization"] > 80 else "balanced"
            }
        }

        # Apply optimization
        client.deployments.update_configuration(
            deployment_name=deployment_name,
            configuration=optimization_config
        )

        return True
    except Exception as e:
        print(f"Error optimizing deployment performance: {str(e)}")
        raise

2. Monitoring and Logging

def setup_deployment_monitoring(deployment_name: str):
    """Configure comprehensive monitoring and logging."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure monitoring
        monitoring_config = {
            "metrics": {
                "latency": True,
                "throughput": True,
                "error_rate": True,
                "token_usage": True,
                "compute_usage": True
            },
            "logging": {
                "level": "Information",
                "request_logging": True,
                "response_logging": True,
                "error_logging": True
            },
            "alerts": [
                {
                    "metric": "error_rate",
                    "threshold": 0.01,
                    "window_minutes": 5,
                    "action": "notify"
                },
                {
                    "metric": "latency_p95",
                    "threshold": 1000,
                    "window_minutes": 5,
                    "action": "scale"
                }
            ]
        }

        # Apply monitoring configuration
        client.deployments.configure_monitoring(
            deployment_name=deployment_name,
            monitoring_config=monitoring_config
        )

        return True
    except Exception as e:
        print(f"Error setting up monitoring: {str(e)}")
        raise

3. Security Management

def configure_deployment_security(deployment_name: str):
    """Configure security settings for deployment."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = AIProjectClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure security settings
        security_config = {
            "authentication": {
                "type": "aad",
                "required_roles": ["AI.User", "AI.Admin"],
                "token_expiration_hours": 24
            },
            "network": {
                "private_endpoint_enabled": True,
                "allowed_ip_ranges": ["10.0.0.0/24"],
                "virtual_network_rules": [
                    {
                        "subnet_id": "/subscriptions/.../resourceGroups/.../providers/Microsoft.Network/virtualNetworks/vnet/subnets/subnet",
                        "ignore_missing_endpoint": False
                    }
                ]
            },
            "data": {
                "encryption_type": "CustomerManaged",
                "key_vault_key_id": os.getenv("KEY_VAULT_KEY_ID"),
                "double_encryption_enabled": True
            },
            "compliance": {
                "audit_logging_enabled": True,
                "diagnostic_settings_enabled": True,
                "retention_days": 90
            }
        }

        # Apply security configuration
        client.deployments.configure_security(
            deployment_name=deployment_name,
            security_config=security_config
        )

        return True
    except Exception as e:
        print(f"Error configuring security: {str(e)}")
        raise

Scaling Strategies

1. Horizontal Scaling

  • Instance management
  • Load balancing
  • Traffic distribution
  • Resource allocation

2. Vertical Scaling

  • Resource adjustment
  • Performance tuning
  • Capacity planning
  • Cost management

3. Auto-scaling

  • Scaling rules
  • Trigger conditions
  • Resource limits
  • Performance targets

Best Practices

1. Deployment Strategy

  • Gradual rollout
  • Version control
  • Rollback planning
  • Documentation

2. Performance Management

  • Regular monitoring
  • Performance tuning
  • Resource optimization
  • Cost tracking

3. Maintenance Planning

  • Update strategy
  • Backup procedures
  • Disaster recovery
  • Support processes

Interactive Workshop

For hands-on practice with model deployment in Azure AI Foundry, try our interactive notebook:

Launch Model Deployment Workshop

This notebook provides: - Step-by-step deployment configuration - Model deployment process walkthrough - Deployment monitoring and management - Scaling and updating deployments - Best practices for production deployments

Next: Testing Deployments