Skip to content

Evaluating Your Customer Service Agent

Let's set up basic evaluation metrics to monitor your agent's performance. This will take about 15 minutes.

Quick Setup

from azure.identity import DefaultAzureCredential
from azure.ai.evaluation import EvaluationClient
import os

class AgentEvaluator:
    def __init__(self):
        """Initialize the evaluation setup."""
        try:
            credential = DefaultAzureCredential()
            self.client = EvaluationClient(
                subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
                resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
                credential=credential
            )
        except Exception as e:
            print(f"Setup error: {str(e)}")
            raise

    def configure_metrics(self):
        """Set up key customer service metrics."""
        try:
            metrics_config = {
                "response_quality": {
                    "relevance_score": True,
                    "completeness_score": True,
                    "accuracy_score": True
                },
                "performance": {
                    "response_time": True,
                    "success_rate": True
                },
                "customer_satisfaction": {
                    "resolution_rate": True,
                    "clarity_score": True
                }
            }

            # Apply configuration
            self.client.configure_metrics(metrics_config)
            return True
        except Exception as e:
            print(f"Metrics configuration error: {str(e)}")
            return False

    def evaluate_response(self, user_input: str, agent_response: str):
        """Evaluate a single agent response."""
        try:
            evaluation = self.client.evaluate_response(
                input_text=user_input,
                response_text=agent_response,
                metrics=["relevance", "completeness", "accuracy"]
            )
            return evaluation
        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            raise

# Usage example
def main():
    evaluator = AgentEvaluator()
    evaluator.configure_metrics()

    # Example evaluation
    result = evaluator.evaluate_response(
        "How do I reset my password?",
        "To reset your password, click the 'Forgot Password' link..."
    )
    print("Evaluation results:", result)

if __name__ == "__main__":
    main()

```python
from azure.identity import DefaultAzureCredential
from azure.ai.evaluation import EvaluationClient
import json
import os
from typing import Dict, Any

class EvaluationSetup:
    def __init__(self):
        """Initialize the evaluation setup with Azure credentials."""
        try:
            self.credential = DefaultAzureCredential()
            self.client = EvaluationClient(
                subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
                resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
                credential=self.credential
            )
        except Exception as e:
            print(f"Error initializing evaluation setup: {str(e)}")
            raise

    def configure_evaluation(self) -> Dict[str, Any]:
        """Configure evaluation settings with recommended defaults."""
        try:
            config = {
                "metrics": {
                    "response_quality": {
                        "weight": 0.3,
                        "threshold": 0.8
                    },
                    "response_relevance": {
                        "weight": 0.3,
                        "threshold": 0.7
                    },
                    "task_completion": {
                        "weight": 0.3,
                        "threshold": 0.9
                    },
                    "error_rate": {
                        "weight": 0.1,
                        "threshold": 0.1
                    }
                },
                "sampling": {
                    "method": "random",
                    "size": 1000,
                    "seed": 42
                },
                "evaluation_settings": {
                    "concurrent_evaluations": 5,
                    "timeout_seconds": 300,
                    "retry_attempts": 3
                }
            }

            # Save configuration
            with open("evaluation_config.json", "w") as f:
                json.dump(config, f, indent=2)

            return config
        except Exception as e:
            print(f"Error configuring evaluation: {str(e)}")
            raise

    def setup_monitoring(self):
        """Configure monitoring and alerting for evaluation."""
        try:
            monitoring_config = {
                "metrics_collection": {
                    "interval": "1m",
                    "retention_days": 30
                },
                "alerts": {
                    "error_rate_threshold": 0.2,
                    "latency_threshold_ms": 1000,
                    "notification_channels": ["email"]
                }
            }

            # Apply monitoring configuration
            self.client.configure_monitoring(monitoring_config)

            return True
        except Exception as e:
            print(f"Error setting up monitoring: {str(e)}")
            return False

# Example usage
if __name__ == "__main__":
    try:
        # Initialize setup
        setup = EvaluationSetup()

        # Configure evaluation
        config = setup.configure_evaluation()
        print("Evaluation configuration completed:")
        print(json.dumps(config, indent=2))

        # Setup monitoring
        monitoring_success = setup.setup_monitoring()
        if monitoring_success:
            print("Monitoring setup completed successfully")

    except Exception as e:
        print(f"Setup failed: {str(e)}")

### 2. Resource Configuration
- Compute resources
- Storage setup
- Network access
- Security settings
- Monitoring tools
- Logging system

### 3. Development Environment
- IDE setup
- SDK integration
- Testing tools
- Debugging utilities
- Documentation access
- Version control

## Evaluation Configuration

### 1. Project Setup
- Project creation
- Resource assignment
- Team access
- Security configuration
- Monitoring setup
- Backup planning

### 2. Metrics Configuration
- Performance metrics
- Quality metrics
- Resource metrics
- Cost metrics
- Security metrics
- Custom metrics

### 3. Test Data Management
- Data collection
- Data preparation
- Data validation
- Storage setup
- Access control
- Version management

## Tool Integration

### 1. SDK Integration
```python
def setup_sdk_integration():
    """Set up and configure SDK integration."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure SDK settings
        sdk_config = {
            "logging": {
                "level": "INFO",
                "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
                "handlers": ["file", "console"]
            },
            "error_handling": {
                "retry_enabled": True,
                "max_retries": 3,
                "backoff_factor": 2
            },
            "telemetry": {
                "enabled": True,
                "sampling_percentage": 100
            }
        }

        # Apply SDK configuration
        client.configure_sdk(sdk_config)

        # Set up logging
        import logging
        logging.basicConfig(
            level=logging.INFO,
            format=sdk_config["logging"]["format"],
            handlers=[
                logging.FileHandler("evaluation.log"),
                logging.StreamHandler()
            ]
        )

        return True
    except Exception as e:
        print(f"Error setting up SDK integration: {str(e)}")
        raise

2. Pipeline Configuration

def configure_evaluation_pipeline():
    """Configure evaluation pipeline and workflow."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Define pipeline configuration
        pipeline_config = {
            "stages": [
                {
                    "name": "data_preparation",
                    "timeout_minutes": 30,
                    "retry_count": 2
                },
                {
                    "name": "model_evaluation",
                    "timeout_minutes": 60,
                    "retry_count": 1
                },
                {
                    "name": "results_analysis",
                    "timeout_minutes": 30,
                    "retry_count": 2
                }
            ],
            "triggers": [
                {
                    "type": "schedule",
                    "cron": "0 0 * * *"  # Daily at midnight
                },
                {
                    "type": "event",
                    "event_type": "ModelDeployment"
                }
            ],
            "error_handling": {
                "continue_on_error": False,
                "notification_channels": ["email"]
            },
            "reporting": {
                "format": "pdf",
                "schedule": "weekly",
                "recipients": ["team@domain.com"]
            }
        }

        # Apply pipeline configuration
        client.configure_pipeline(pipeline_config)

        return True
    except Exception as e:
        print(f"Error configuring pipeline: {str(e)}")
        raise

3. Monitoring Setup

def setup_evaluation_monitoring():
    """Configure comprehensive monitoring for evaluation system."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure monitoring settings
        monitoring_config = {
            "metrics": {
                "collection_interval": "1m",
                "retention_days": 30,
                "custom_metrics": [
                    {
                        "name": "evaluation_success_rate",
                        "aggregation": "average",
                        "unit": "percent"
                    }
                ]
            },
            "alerts": [
                {
                    "name": "high_failure_rate",
                    "metric": "evaluation_failure_rate",
                    "threshold": 0.1,
                    "window_minutes": 15,
                    "action": "notify"
                }
            ],
            "dashboards": [
                {
                    "name": "Evaluation Overview",
                    "refresh_interval": "5m",
                    "widgets": [
                        "success_rate",
                        "latency",
                        "resource_usage"
                    ]
                }
            ],
            "logs": {
                "workspace_id": os.getenv("LOG_ANALYTICS_WORKSPACE_ID"),
                "retention_days": 30,
                "diagnostic_settings": {
                    "enabled": True,
                    "categories": ["Evaluation", "Pipeline", "Security"]
                }
            },
            "reporting": {
                "scheduled_reports": [
                    {
                        "name": "Weekly Performance",
                        "schedule": "0 0 * * 0",  # Weekly on Sunday
                        "format": "pdf",
                        "recipients": ["team@domain.com"]
                    }
                ],
                "export_format": "csv",
                "auto_export": True
            }
        }

        # Apply monitoring configuration
        client.configure_monitoring(monitoring_config)

        return True
    except Exception as e:
        print(f"Error setting up monitoring: {str(e)}")
        raise

Security Configuration

1. Access Control

def configure_access_control():
    """Configure access control and security settings."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure authentication settings
        auth_config = {
            "authentication": {
                "type": "aad",
                "tenant_id": os.getenv("AZURE_TENANT_ID"),
                "required_roles": ["AI.Evaluator", "AI.Admin"]
            },
            "authorization": {
                "role_assignments": [
                    {
                        "role": "AI.Evaluator",
                        "scope": "project",
                        "assignees": ["user@domain.com"]
                    }
                ],
                "custom_roles": [
                    {
                        "name": "EvaluationViewer",
                        "permissions": ["read", "execute"]
                    }
                ]
            },
            "audit": {
                "logging_enabled": True,
                "log_retention_days": 90,
                "detailed_logging": True
            }
        }

        # Apply configuration
        client.security.configure_access_control(auth_config)

        return True
    except Exception as e:
        print(f"Error configuring access control: {str(e)}")
        raise

2. Data Protection

def setup_data_protection():
    """Configure data protection and compliance settings."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure data protection
        protection_config = {
            "encryption": {
                "type": "CustomerManaged",
                "key_vault_id": os.getenv("KEY_VAULT_ID"),
                "key_name": "evaluation-key",
                "auto_rotation_enabled": True
            },
            "access_policies": {
                "data_access": "RoleBasedAccessControl",
                "minimum_tls_version": "1.2",
                "allowed_ip_ranges": ["10.0.0.0/24"]
            },
            "backup": {
                "enabled": True,
                "frequency": "Daily",
                "retention_days": 30,
                "geo_redundant": True
            },
            "compliance": {
                "data_classification": "Confidential",
                "retention_policy": "1Year",
                "audit_enabled": True
            }
        }

        # Apply configuration
        client.security.configure_data_protection(protection_config)

        return True
    except Exception as e:
        print(f"Error setting up data protection: {str(e)}")
        raise

3. Network Security

def configure_network_security():
    """Configure network security and monitoring."""
    try:
        # Initialize client
        credential = DefaultAzureCredential()
        client = EvaluationClient(
            subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"),
            resource_group=os.getenv("AZURE_RESOURCE_GROUP"),
            credential=credential
        )

        # Configure network security
        network_config = {
            "network": {
                "virtual_network_id": os.getenv("VNET_ID"),
                "subnet_id": os.getenv("SUBNET_ID"),
                "private_endpoint_enabled": True
            },
            "firewall": {
                "rules": [
                    {
                        "name": "allow-internal",
                        "priority": 100,
                        "action": "Allow",
                        "source_addresses": ["10.0.0.0/16"]
                    }
                ],
                "ddos_protection_enabled": True
            },
            "monitoring": {
                "traffic_analytics_enabled": True,
                "flow_logs_enabled": True,
                "retention_days": 30,
                "alerts": [
                    {
                        "name": "high-traffic",
                        "metric": "BytesTransferred",
                        "threshold": 1000000,
                        "window_minutes": 5
                    }
                ]
            },
            "threat_protection": {
                "enabled": True,
                "scan_level": "High",
                "email_notifications": True,
                "incident_response": {
                    "auto_remediation": True,
                    "notification_channels": ["email", "webhook"]
                }
            }
        }

        # Apply configuration
        client.security.configure_network_security(network_config)

        return True
    except Exception as e:
        print(f"Error configuring network security: {str(e)}")
        raise

Best Practices

1. Environment Management

  • Resource optimization
  • Cost monitoring
  • Performance tuning
  • Security hardening
  • Documentation
  • Team training

2. Data Management

  • Data organization
  • Version control
  • Access management
  • Backup strategy
  • Recovery procedures
  • Compliance monitoring

3. Operational Excellence

  • Process automation
  • Quality control
  • Performance monitoring
  • Security maintenance
  • Documentation updates
  • Knowledge sharing

Interactive Workshop

To get hands-on experience with performance metrics and evaluation setup, we've prepared an interactive Jupyter notebook that will guide you through: - Defining custom evaluation metrics - Setting up comprehensive test cases - Running performance evaluations - Analyzing and visualizing results - Implementing optimization recommendations

Launch Performance Metrics Workshop

This notebook provides a practical implementation of performance metrics evaluation. You'll work directly with the Azure AI Evaluation SDK to measure and optimize your customer service agent's performance.

Next: Evaluating Agents