diff --git a/01-tutorials/07-AgentCore-E2E/lab-07-agent-evaluation.ipynb b/01-tutorials/07-AgentCore-E2E/lab-07-agent-evaluation.ipynb
new file mode 100644
index 00000000..8458afcf
--- /dev/null
+++ b/01-tutorials/07-AgentCore-E2E/lab-07-agent-evaluation.ipynb
@@ -0,0 +1,811 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Customer Support Agent Evaluation Framework\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "This notebook implements the [Strands Agents evaluation strategy](https://strandsagents.com/0.1.x/documentation/docs/user-guide/observability-evaluation/evaluation/?h=evaluation) with LLM Judge Evaluation to comprehensively assess your customer support agent running on Amazon Bedrock AgentCore Runtime.\n",
+    "\n",
+    "### Evaluation Strategy\n",
+    "\n",
+    "- **Multi-dimensional Quality Assessment**: Helpfulness, accuracy, clarity, professionalism, completeness\n",
+    "- **Tool Usage Analysis**: Appropriate tool selection and usage patterns\n",
+    "- **Performance Metrics**: Response times and success rates\n",
+    "- **LLM-as-Judge**: Claude 4 Sonnet for objective evaluation with Tool Usage tracked using x-ray and observability\n",
+    "- **Summarize test results**\n",
+    "- **Save outputs to a file**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup and Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install boto3 requests strands-agents bedrock-agentcore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import json\n",
+    "import time\n",
+    "import uuid\n",
+    "import boto3\n",
+    "import requests\n",
+    "from dataclasses import dataclass, asdict\n",
+    "from typing import Dict, List, Any, Optional\n",
+    "from datetime import datetime\n",
+    "\n",
+    "\n",
+    "# AWS clients\n",
+    "bedrock = boto3.client('bedrock-runtime')\n",
+    "ssm = boto3.client('ssm')\n",
+    "\n",
+    "print(\"✅ Dependencies loaded successfully\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Configuration\n",
+    "AGENT_NAME = \"customer_support_agent\" \n",
+    "EVALUATOR_MODEL = \"us.anthropic.claude-sonnet-4-20250514-v1:0\"\n",
+    "\n",
+    "# Helper function to get SSM parameters\n",
+    "def get_ssm_parameter(name: str) -> str:\n",
+    "    try:\n",
+    "        response = ssm.get_parameter(Name=name, WithDecryption=True)\n",
+    "        return response['Parameter']['Value']\n",
+    "    except Exception as e:\n",
+    "        raise Exception(f\"Failed to get SSM parameter {name}: {e}\")\n",
+    "\n",
+    "# Get agent endpoint\n",
+    "try:\n",
+    "    AGENT_ENDPOINT = get_ssm_parameter(f\"/app/customersupport/agentcore/runtime_arn\") # This is the default value set in lab-04-agentcore-runtime.ipynb \n",
+    "    # print(f\"✅ Agent endpoint: {AGENT_ENDPOINT}\")\n",
+    "except Exception as e:\n",
+    "    print(f\"⚠️  Could not get agent endpoint: {e}\")\n",
+    "    AGENT_ENDPOINT = \"http://localhost:8080\"  # Fallback for local testing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Classes and Test Cases"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class TestCase:\n",
+    "    id: str\n",
+    "    query: str\n",
+    "    category: str\n",
+    "    expected_tools: List[str]\n",
+    "    expected_criteria: Dict[str, Any]\n",
+    "    description: str\n",
+    "\n",
+    "@dataclass\n",
+    "class EvaluationResult:\n",
+    "    test_case_id: str\n",
+    "    query: str\n",
+    "    response: str\n",
+    "    metrics: Dict[str, float]\n",
+    "    response_time: float\n",
+    "    success: bool\n",
+    "    error_message: Optional[str] = None\n",
+    "    tool_calls: List[str] = None\n",
+    "    \n",
+    "    def to_dict(self):\n",
+    "        return asdict(self)\n",
+    "\n",
+    "# Test cases\n",
+    "TEST_CASES = [\n",
+    "    TestCase(\n",
+    "        id=\"basic_greeting\",\n",
+    "        query=\"Hi, I need help with my account\",\n",
+    "        category=\"basic_inquiry\",\n",
+    "        expected_tools=[],\n",
+    "        expected_criteria={\"should_be_polite\": True, \"should_ask_for_details\": True},\n",
+    "        description=\"Basic greeting and help request\"\n",
+    "    ),\n",
+    "    TestCase(\n",
+    "        id=\"return_policy_check\",\n",
+    "        query=\"What is your return policy for electronics?\",\n",
+    "        category=\"policy_inquiry\",\n",
+    "        expected_tools=[\"get_return_policy\"],\n",
+    "        expected_criteria={\"should_provide_policy\": True, \"should_be_clear\": True},\n",
+    "        description=\"Return policy information request\"\n",
+    "    ),\n",
+    "    TestCase(\n",
+    "        id=\"product_info_request\",\n",
+    "        query=\"Can you tell me about the Gaming Console Pro specifications?\",\n",
+    "        category=\"product_inquiry\",\n",
+    "        expected_tools=[\"get_product_info\"],\n",
+    "        expected_criteria={\"should_provide_specs\": True, \"should_be_detailed\": True},\n",
+    "        description=\"Product information request\"\n",
+    "    ),\n",
+    "    TestCase(\n",
+    "        id=\"troubleshooting_general\",\n",
+    "        query=\"I have overheating issues with my device, help me debug\",\n",
+    "        category=\"technical_support\",\n",
+    "        expected_tools=[],\n",
+    "        expected_criteria={\"should_ask_device_details\": True, \"should_provide_steps\": True},\n",
+    "        description=\"Technical troubleshooting without specialized tools\"\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "print(f\"✅ Loaded {len(TEST_CASES)} test cases\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Agent Invocation and Evaluation Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize AgentCore Runtime client\n",
+    "from bedrock_agentcore_starter_toolkit import Runtime\n",
+    "from lab_helpers.utils import reauthenticate_user, get_existing_cognito_config\n",
+    "import os\n",
+    "import time\n",
+    "import uuid \n",
+    "from typing import Dict, List, Any, Optional\n",
+    "import boto3\n",
+    "# Enable verbose logging for requests\n",
+    "import logging\n",
+    "## Set the below config parameters if you want to see detailed logs from the agent\n",
+    "#logging.basicConfig(level=logging.DEBUG)\n",
+    "#logging.getLogger(\"urllib3.connectionpool\").setLevel(logging.DEBUG)\n",
+    "import urllib.parse\n",
+    "\n",
+    "session_id = uuid.uuid4()\n",
+    "agentcore_client = boto3.client(\n",
+    "        'bedrock-agentcore',\n",
+    "    )\n",
+    "REGION_NAME=\"us-east-1\"\n",
+    "cognito_config = get_existing_cognito_config()\n",
+    "\n",
+    "async def invoke_agent(query: str, actor_id: str = \"testuser\") -> Dict[str, Any]:\n",
+    "    \"\"\"Invoke the agent using AgentCore Runtime SDK with JWT token\"\"\"\n",
+    "    start_time = time.time()\n",
+    "    \n",
+    "        # Get bearer token\n",
+    "    bearer_token = reauthenticate_user(\n",
+    "        cognito_config.get(\"client_id\"), \n",
+    "        cognito_config.get(\"client_secret\")\n",
+    "    )\n",
+    "        \n",
+    "    #print(f\"Using Agent ARN from environment: {AGENT_ENDPOINT}\")\n",
+    "\n",
+    "    # URL encode the agent ARN\n",
+    "    escaped_agent_arn = urllib.parse.quote(AGENT_ENDPOINT, safe='')\n",
+    "\n",
+    "    # Construct the URL\n",
+    "    url = f\"https://bedrock-agentcore.{REGION_NAME}.amazonaws.com/runtimes/{escaped_agent_arn}/invocations?qualifier=DEFAULT\"\n",
+    "\n",
+    "    # Set up headers\n",
+    "    headers = {\n",
+    "        \"Authorization\": f\"Bearer {bearer_token}\", #f\"Bearer {auth_token}\",\n",
+    "        \"X-Amzn-Trace-Id\": \"1234\", \n",
+    "        \"Content-Type\": \"application/json\",\n",
+    "        \"X-Amzn-Bedrock-AgentCore-Runtime-Session-Id\": f\"eval-session-{uuid.uuid4()}\"\n",
+    "    }\n",
+    "    invoke_response = requests.post(\n",
+    "        url,\n",
+    "        headers=headers,\n",
+    "        data=json.dumps({\"prompt\": query})\n",
+    "    )\n",
+    "\n",
+    "    # Print response in a safe manner\n",
+    "    print(f\"Status Code: {invoke_response.status_code}\")\n",
+    "    print(f\"Response Headers: {dict(invoke_response.headers)}\")\n",
+    "\n",
+    "    if invoke_response.status_code == 200:\n",
+    "        try:\n",
+    "            response_data = invoke_response.json()\n",
+    "            print(\"Response JSON:\", response_data)\n",
+    "            \n",
+    "            # Handle both string and dict responses\n",
+    "            if isinstance(response_data, str):\n",
+    "                response_text = response_data\n",
+    "            elif isinstance(response_data, dict):\n",
+    "                response_text = response_data.get(\"result\", str(response_data))\n",
+    "            else:\n",
+    "                response_text = str(response_data)\n",
+    "                \n",
+    "            return {\n",
+    "                \"response\": response_text,\n",
+    "                \"success\": True,\n",
+    "                \"tool_calls\": extract_tool_calls(invoke_response, response_text),  # Pass response object\n",
+    "                \"response_time\": time.time() - start_time\n",
+    "            }\n",
+    "        except json.JSONDecodeError:\n",
+    "            return {\n",
+    "                \"response\": invoke_response.text,\n",
+    "                \"success\": True,\n",
+    "                \"tool_calls\": [],\n",
+    "                \"response_time\": time.time() - start_time\n",
+    "            }\n",
+    "    else:\n",
+    "        error_msg = f\"Error ({invoke_response.status_code}): {invoke_response.text[:500]}\"\n",
+    "        print(error_msg)\n",
+    "        if invoke_response.status_code >= 400:\n",
+    "            print(\"Please ensure lab-04-agentcore-runtime.ipynb has been executed and agent is deployed\")\n",
+    "        return {\n",
+    "            \"response\": error_msg,\n",
+    "            \"success\": False,\n",
+    "            \"tool_calls\": [],\n",
+    "            \"response_time\": time.time() - start_time\n",
+    "        }\n",
+    "    \n",
+    "    \n",
+    "\"\"\"\n",
+    "AgentCore observability tool extraction using gen_ai.tool.name and tool.status.\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def extract_tool_calls_from_agentcore_observability(response_obj, response_text: str = \"\") -> List[str]:\n",
+    "    \"\"\"Extract tool calls using AgentCore observability gen_ai.tool.name and tool.status.\"\"\"\n",
+    "    \n",
+    "    # Extract session ID from headers\n",
+    "    session_id = None\n",
+    "    if hasattr(response_obj, 'headers'):\n",
+    "        headers = dict(response_obj.headers)\n",
+    "        session_id = headers.get('X-Amzn-Bedrock-AgentCore-Runtime-Session-Id')\n",
+    "        if not session_id and 'baggage' in headers:\n",
+    "            baggage = headers['baggage']\n",
+    "            if 'session.id=' in baggage:\n",
+    "                session_id = baggage.split('session.id=')[1].split(',')[0]\n",
+    "    \n",
+    "    tools = []\n",
+    "    \n",
+    "    # Query X-Ray for gen_ai.tool.name spans\n",
+    "    if session_id:\n",
+    "        try:\n",
+    "            xray_client = boto3.client('xray')\n",
+    "            \n",
+    "            # Get traces with gen_ai annotations\n",
+    "            response = xray_client.get_trace_summaries(\n",
+    "                TimeRangeType='Service',\n",
+    "                StartTime=time.time() - 300,\n",
+    "                EndTime=time.time(),\n",
+    "                ServiceName='bedrock-agentcore'\n",
+    "            )\n",
+    "            \n",
+    "            for trace_summary in response.get('TraceSummaries', []):\n",
+    "                trace_response = xray_client.batch_get_traces(TraceIds=[trace_summary['Id']])\n",
+    "                \n",
+    "                for trace in trace_response.get('Traces', []):\n",
+    "                    for segment in trace.get('Segments', []):\n",
+    "                        segment_doc = json.loads(segment['Document'])\n",
+    "                        \n",
+    "                        # Check for gen_ai.tool.name in annotations\n",
+    "                        annotations = segment_doc.get('annotations', {})\n",
+    "                        if 'gen_ai.tool.name' in annotations:\n",
+    "                            tool_name = annotations['gen_ai.tool.name']\n",
+    "                            tool_status = annotations.get('tool.status', 'success')\n",
+    "                            \n",
+    "                            # Only include successful tool calls\n",
+    "                            if tool_status in ['success', 'completed']:\n",
+    "                                tools.append(tool_name)\n",
+    "                        \n",
+    "                        # Check subsegments\n",
+    "                        for subsegment in segment_doc.get('subsegments', []):\n",
+    "                            sub_annotations = subsegment.get('annotations', {})\n",
+    "                            if 'gen_ai.tool.name' in sub_annotations:\n",
+    "                                tool_name = sub_annotations['gen_ai.tool.name']\n",
+    "                                tool_status = sub_annotations.get('tool.status', 'success')\n",
+    "                                \n",
+    "                                if tool_status in ['success', 'completed']:\n",
+    "                                    tools.append(tool_name)\n",
+    "                                    \n",
+    "        except Exception as e:\n",
+    "            print(f\"X-Ray observability extraction failed: {e}\")\n",
+    "    \n",
+    "    # Fallback to content analysis if no observability data\n",
+    "    if not tools and response_text:\n",
+    "        if any(phrase in response_text.lower() for phrase in [\n",
+    "            \"return policy\", \"30-day return\", \"refunds typically process\"\n",
+    "        ]):\n",
+    "            tools.append(\"get_return_policy\")\n",
+    "            \n",
+    "        if any(phrase in response_text.lower() for phrase in [\n",
+    "            \"gaming console pro\", \"specifications\", \"technical details\"\n",
+    "        ]):\n",
+    "            tools.append(\"get_product_info\")\n",
+    "    \n",
+    "    return list(set(tools))\n",
+    "    \n",
+    "\n",
+    "def extract_tool_calls(response_obj, response_text: str = \"\") -> List[str]:\n",
+    "    \"\"\"Extract tool calls using AgentCore gen_ai.tool.name observability.\"\"\"\n",
+    "    \n",
+    "    # Extract session ID from headers\n",
+    "    session_id = None\n",
+    "    if hasattr(response_obj, 'headers'):\n",
+    "        headers = dict(response_obj.headers)\n",
+    "        session_id = headers.get('X-Amzn-Bedrock-AgentCore-Runtime-Session-Id')\n",
+    "    \n",
+    "    tools = []\n",
+    "    \n",
+    "    # Query X-Ray for gen_ai.tool.name spans\n",
+    "    if session_id:\n",
+    "        try:\n",
+    "            xray_client = boto3.client('xray')\n",
+    "            response = xray_client.get_trace_summaries(\n",
+    "                TimeRangeType='Service',\n",
+    "                StartTime=time.time() - 300,\n",
+    "                EndTime=time.time(),\n",
+    "                ServiceName='bedrock-agentcore'\n",
+    "            )\n",
+    "            \n",
+    "            for trace_summary in response.get('TraceSummaries', []):\n",
+    "                trace_response = xray_client.batch_get_traces(TraceIds=[trace_summary['Id']])\n",
+    "                \n",
+    "                for trace in trace_response.get('Traces', []):\n",
+    "                    for segment in trace.get('Segments', []):\n",
+    "                        segment_doc = json.loads(segment['Document'])\n",
+    "                        \n",
+    "                        # Check annotations for gen_ai.tool.name\n",
+    "                        annotations = segment_doc.get('annotations', {})\n",
+    "                        if 'gen_ai.tool.name' in annotations and annotations.get('tool.status') == 'success':\n",
+    "                            tools.append(annotations['gen_ai.tool.name'])\n",
+    "                            \n",
+    "        except Exception:\n",
+    "            pass\n",
+    "    \n",
+    "    # Fallback to content analysis\n",
+    "    if not tools and response_text:\n",
+    "        if \"return policy\" in response_text.lower():\n",
+    "            tools.append(\"get_return_policy\")\n",
+    "        if \"gaming console pro\" in response_text.lower():\n",
+    "            tools.append(\"get_product_info\")\n",
+    "    \n",
+    "    return list(set(tools))\n",
+    "\n",
+    "\n",
+    "print(\"✅ Agent invocation functions defined\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cognito_config # Ensure the output matches what you see from lab-04"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def evaluate_response_quality(query: str, response: str, criteria: Dict[str, Any]) -> Dict[str, float]:\n",
+    "    \"\"\"Evaluate response quality using Claude as judge\"\"\"\n",
+    "    \n",
+    "    evaluation_prompt = f\"\"\"\n",
+    "    You are an expert evaluator for customer support AI agents. Evaluate the following response on a scale of 1-5 for each metric.\n",
+    "\n",
+    "    Customer Query: {query}\n",
+    "    Agent Response: {response}\n",
+    "\n",
+    "    Evaluate on these metrics (1=Poor, 2=Below Average, 3=Average, 4=Good, 5=Excellent):\n",
+    "\n",
+    "    1. HELPFULNESS: Does the response address the customer's needs and provide useful information?\n",
+    "    2. ACCURACY: Is the information provided factually correct and reliable?\n",
+    "    3. CLARITY: Is the response clear, well-structured, and easy to understand?\n",
+    "    4. PROFESSIONALISM: Does the response maintain appropriate tone and professionalism?\n",
+    "    5. COMPLETENESS: Does the response fully address all aspects of the query?\n",
+    "\n",
+    "    Expected criteria: {json.dumps(criteria, indent=2)}\n",
+    "\n",
+    "    Respond with ONLY a JSON object in this format:\n",
+    "    {{\n",
+    "        \"helpfulness\": <score>,\n",
+    "        \"accuracy\": <score>,\n",
+    "        \"clarity\": <score>,\n",
+    "        \"professionalism\": <score>,\n",
+    "        \"completeness\": <score>,\n",
+    "        \"reasoning\": \"Brief explanation of scores\"\n",
+    "    }}\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    try:\n",
+    "        response_obj = bedrock.invoke_model(\n",
+    "            modelId=EVALUATOR_MODEL,\n",
+    "            body=json.dumps({\n",
+    "                \"anthropic_version\": \"bedrock-2023-05-31\",\n",
+    "                \"max_tokens\": 1000,\n",
+    "                \"messages\": [\n",
+    "                    {\"role\": \"user\", \"content\": evaluation_prompt}\n",
+    "                ]\n",
+    "            })\n",
+    "        )\n",
+    "        \n",
+    "        result = json.loads(response_obj['body'].read())\n",
+    "        content = result['content'][0]['text']\n",
+    "        \n",
+    "        # Extract JSON from response\n",
+    "        start_idx = content.find('{')\n",
+    "        end_idx = content.rfind('}') + 1\n",
+    "        json_str = content[start_idx:end_idx]\n",
+    "        \n",
+    "        scores = json.loads(json_str)\n",
+    "        return {k: v for k, v in scores.items() if k != \"reasoning\"}\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"Error in quality evaluation: {e}\")\n",
+    "        return {\n",
+    "            \"helpfulness\": 0.0,\n",
+    "            \"accuracy\": 0.0,\n",
+    "            \"clarity\": 0.0,\n",
+    "            \"professionalism\": 0.0,\n",
+    "            \"completeness\": 0.0\n",
+    "        }\n",
+    "\n",
+    "def evaluate_tool_usage(expected_tools: List[str], actual_tools: List[str]) -> float:\n",
+    "    \"\"\"Evaluate tool usage effectiveness\"\"\"\n",
+    "    if not expected_tools:\n",
+    "        return 5.0 if not actual_tools else 3.0\n",
+    "    \n",
+    "    if not actual_tools:\n",
+    "        print(f\"Expected tools {expected_tools}, while actual tools {actual_tools}\")\n",
+    "        return 0.0  # Return 0 if tools expected but none called\n",
+    "    \n",
+    "    expected_set = set(expected_tools)\n",
+    "    actual_set = set(actual_tools)\n",
+    "    \n",
+    "    precision = len(expected_set.intersection(actual_set)) / len(actual_set) if actual_set else 0\n",
+    "    recall = len(expected_set.intersection(actual_set)) / len(expected_set) if expected_set else 0\n",
+    "    \n",
+    "    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0\n",
+    "    return f1 * 5  # Scale to 0-5\n",
+    "\n",
+    "print(\"✅ Evaluation functions defined\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Single Test Case Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def evaluate_test_case(test_case: TestCase) -> EvaluationResult:\n",
+    "    \"\"\"Evaluate a single test case\"\"\"\n",
+    "    print(f\"🔍 Evaluating: {test_case.id} - {test_case.description}\")\n",
+    "    \n",
+    "    # Invoke agent\n",
+    "    agent_result = await invoke_agent(test_case.query)\n",
+    "    \n",
+    "    # Handle None result\n",
+    "    if agent_result is None:\n",
+    "        return EvaluationResult(\n",
+    "            test_case_id=test_case.id,\n",
+    "            query=test_case.query,\n",
+    "            response=\"\",\n",
+    "            metrics={},\n",
+    "            response_time=0.0,\n",
+    "            success=False,\n",
+    "            error_message=\"invoke_agent returned None\"\n",
+    "        )\n",
+    "    \n",
+    "    if not agent_result[\"success\"]:\n",
+    "        return EvaluationResult(\n",
+    "            test_case_id=test_case.id,\n",
+    "            query=test_case.query,\n",
+    "            response=\"\",\n",
+    "            metrics={},\n",
+    "            response_time=agent_result[\"response_time\"],\n",
+    "            success=False,\n",
+    "            error_message=agent_result[\"error_message\"]\n",
+    "        )\n",
+    "    \n",
+    "    # Evaluate response quality\n",
+    "    quality_scores = await evaluate_response_quality(\n",
+    "        test_case.query,\n",
+    "        agent_result[\"response\"],\n",
+    "        test_case.expected_criteria\n",
+    "    )\n",
+    "    \n",
+    "    # Evaluate tool usage\n",
+    "    tool_score = evaluate_tool_usage(\n",
+    "        test_case.expected_tools,\n",
+    "        agent_result[\"tool_calls\"]\n",
+    "    )\n",
+    "    \n",
+    "    # Combine all metrics\n",
+    "    metrics = {\n",
+    "        **quality_scores,\n",
+    "        \"tool_usage\": tool_score,\n",
+    "        \"response_time\": agent_result[\"response_time\"]\n",
+    "    }\n",
+    "    \n",
+    "    return EvaluationResult(\n",
+    "        test_case_id=test_case.id,\n",
+    "        query=test_case.query,\n",
+    "        response=agent_result[\"response\"],\n",
+    "        metrics=metrics,\n",
+    "        response_time=agent_result[\"response_time\"],\n",
+    "        success=True,\n",
+    "        tool_calls=agent_result[\"tool_calls\"]\n",
+    "    )\n",
+    "\n",
+    "print(\"✅ Test case evaluation function defined\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run Single Test Case (Demo)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test a single case first\n",
+    "demo_test = TEST_CASES[0]  # Basic greeting\n",
+    "demo_result = await evaluate_test_case(demo_test)\n",
+    "\n",
+    "print(f\"\\n📊 Demo Result for '{demo_test.id}':\")\n",
+    "print(f\"Query: {demo_result.query}\")\n",
+    "print(f\"Response: {demo_result.response[:200]}...\" if len(demo_result.response) > 200 else f\"Response: {demo_result.response}\")\n",
+    "print(f\"Response Time: {demo_result.response_time:.3f}s\")\n",
+    "print(f\"Tool Calls: {demo_result.tool_calls}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Full Evaluation Suite"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def run_full_evaluation(test_cases: List[TestCase]) -> Dict[str, Any]:\n",
+    "    \"\"\"Run evaluation on all test cases\"\"\"\n",
+    "    print(f\"🚀 Starting evaluation of {len(test_cases)} test cases...\")\n",
+    "    \n",
+    "    results = []\n",
+    "    for i, test_case in enumerate(test_cases, 1):\n",
+    "        print(f\"\\n[{i}/{len(test_cases)}] Processing: {test_case.id}\")\n",
+    "        result = await evaluate_test_case(test_case)\n",
+    "        results.append(result)\n",
+    "        \n",
+    "        # Brief pause between tests\n",
+    "        await asyncio.sleep(1)\n",
+    "    \n",
+    "    # Calculate summary statistics\n",
+    "    summary = calculate_summary(results)\n",
+    "    \n",
+    "    return {\n",
+    "        \"agent_name\": AGENT_NAME,\n",
+    "        \"total_test_cases\": len(test_cases),\n",
+    "        \"results\": [result.to_dict() for result in results],\n",
+    "        \"summary\": summary,\n",
+    "        \"timestamp\": datetime.now().isoformat()\n",
+    "    }\n",
+    "\n",
+    "def calculate_summary(results: List[EvaluationResult]) -> Dict[str, Any]:\n",
+    "    \"\"\"Calculate summary statistics\"\"\"\n",
+    "    successful_results = [r for r in results if r.success]\n",
+    "    \n",
+    "    if not successful_results:\n",
+    "        return {\"error\": \"No successful test cases\"}\n",
+    "    \n",
+    "    # Average scores\n",
+    "    metrics = [\"helpfulness\", \"accuracy\", \"clarity\", \"professionalism\", \"completeness\", \"tool_usage\"]\n",
+    "    avg_scores = {}\n",
+    "    \n",
+    "    for metric in metrics:\n",
+    "        scores = [r.metrics.get(metric, 0) for r in successful_results if metric in r.metrics]\n",
+    "        avg_scores[metric] = sum(scores) / len(scores) if scores else 0\n",
+    "    \n",
+    "    # Response time statistics\n",
+    "    response_times = sorted([r.response_time for r in successful_results])\n",
+    "    n = len(response_times)\n",
+    "    \n",
+    "    percentiles = {\n",
+    "        \"p50\": response_times[n//2] if n > 0 else 0,\n",
+    "        \"p90\": response_times[int(n*0.9)] if n > 0 else 0,\n",
+    "        \"p95\": response_times[int(n*0.95)] if n > 0 else 0,\n",
+    "        \"p99\": response_times[int(n*0.99)] if n > 0 else 0,\n",
+    "    }\n",
+    "    \n",
+    "    return {\n",
+    "        \"success_rate\": len(successful_results) / len(results),\n",
+    "        \"average_scores\": avg_scores,\n",
+    "        \"overall_score\": sum(avg_scores.values()) / len(avg_scores) if avg_scores else 0,\n",
+    "        \"response_time_percentiles\": percentiles,\n",
+    "        \"total_successful\": len(successful_results),\n",
+    "        \"total_failed\": len(results) - len(successful_results)\n",
+    "    }\n",
+    "\n",
+    "def safe_score_calculation(metrics):\n",
+    "    numeric_values = [v for k, v in metrics.items() \n",
+    "                     if k != 'response_time' and isinstance(v, (int, float))]\n",
+    "    return sum(numeric_values) / len(numeric_values) if numeric_values else 0\n",
+    "\n",
+    "print(\"✅ Full evaluation functions defined\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run full evaluation\n",
+    "evaluation_results = await run_full_evaluation(TEST_CASES)\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*60)\n",
+    "print(\"📊 EVALUATION COMPLETE\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "# Display LLM Judge scores for each test\n",
+    "print(\"\\n🤖 LLM JUDGE SCORES BY TEST CASE:\")\n",
+    "print(\"-\" * 60)\n",
+    "for result in evaluation_results.get(\"results\", []):\n",
+    "    if result[\"success\"] and result.get(\"metrics\"):\n",
+    "        metrics = result[\"metrics\"]\n",
+    "        print(f\"\\n📝 {result['test_case_id'].upper()}:\")\n",
+    "        print(f\"   Helpfulness:     {metrics.get('helpfulness', 0):.1f}/5.0\")\n",
+    "        print(f\"   Accuracy:        {metrics.get('accuracy', 0):.1f}/5.0\")\n",
+    "        print(f\"   Clarity:         {metrics.get('clarity', 0):.1f}/5.0\")\n",
+    "        print(f\"   Professionalism: {metrics.get('professionalism', 0):.1f}/5.0\")\n",
+    "        print(f\"   Completeness:    {metrics.get('completeness', 0):.1f}/5.0\")\n",
+    "        print(f\"   Tool Usage:      {metrics.get('tool_usage', 0):.1f}/5.0\")\n",
+    "        avg_score = safe_score_calculation(metrics)\n",
+    "    else:\n",
+    "        print(f\"\\n❌ {result['test_case_id'].upper()}: FAILED\")\n",
+    "        print(f\"   Error: {result.get('error_message', 'Unknown error')}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Results Analysis and Visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def print_detailed_summary(results: dict):\n",
+    "    \"\"\"Print comprehensive evaluation summary\"\"\"\n",
+    "    summary = results.get(\"summary\", {})\n",
+    "    \n",
+    "    print(f\"🤖 Agent: {results['agent_name']}\")\n",
+    "    print(f\"📝 Total Test Cases: {results['total_test_cases']}\")\n",
+    "    print(f\"✅ Success Rate: {summary.get('success_rate', 0):.1%}\")\n",
+    "    print(f\"🎯 Overall Score: {summary.get('overall_score', 0):.2f}/5.0\")\n",
+    "    \n",
+    "    print(\"\\n📈 QUALITY METRICS (1-5 scale):\")\n",
+    "    avg_scores = summary.get(\"average_scores\", {})\n",
+    "    for metric, score in avg_scores.items():\n",
+    "        if metric != \"response_time\":\n",
+    "            emoji = \"🟢\" if score >= 4.0 else \"🟡\" if score >= 3.0 else \"🔴\"\n",
+    "            print(f\"  {emoji} {metric.title()}: {score:.2f}\")\n",
+    "    \n",
+    "    print(\"\\n⏱️  RESPONSE TIME PERCENTILES:\")\n",
+    "    percentiles = summary.get(\"response_time_percentiles\", {})\n",
+    "    for p, time_val in percentiles.items():\n",
+    "        print(f\"  {p.upper()}: {time_val:.3f}s\")\n",
+    "    \n",
+    "    print(\"\\n📋 DETAILED RESULTS:\")\n",
+    "    for result in results.get(\"results\", []):\n",
+    "        status = \"✅\" if result[\"success\"] else \"❌\"\n",
+    "        if result[\"success\"] and result.get(\"metrics\"):\n",
+    "            score = safe_score_calculation(metrics)#sum(result[\"metrics\"].values()) / len(result[\"metrics\"])\n",
+    "            print(f\"  {status} {result['test_case_id']}: {score:.2f}/5.0 ({result['response_time']:.3f}s)\")\n",
+    "        else:\n",
+    "            print(f\"  {status} {result['test_case_id']}: FAILED - {result.get('error_message', 'Unknown error')}\")\n",
+    "\n",
+    "# Print detailed summary\n",
+    "print_detailed_summary(evaluation_results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save results to file\n",
+    "output_file = f\"evaluation_results_{AGENT_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json\"\n",
+    "\n",
+    "with open(output_file, 'w') as f:\n",
+    "    json.dump(evaluation_results, f, indent=2)\n",
+    "\n",
+    "print(f\"💾 Results saved to: {output_file}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".parent_venv (3.12.4)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/01-tutorials/07-AgentCore-E2E/lab_helpers/utils.py b/01-tutorials/07-AgentCore-E2E/lab_helpers/utils.py
index 4d687288..e3596c9b 100644
--- a/01-tutorials/07-AgentCore-E2E/lab_helpers/utils.py
+++ b/01-tutorials/07-AgentCore-E2E/lab_helpers/utils.py
@@ -333,6 +333,17 @@ def cleanup_cognito_resources(pool_id):
         return False
 
 
+def get_existing_cognito_config():
+    """Get the existing Cognito configuration used in lab-04"""
+    try:
+        secret_value = get_customer_support_secret()
+        if secret_value:
+            return json.loads(secret_value)
+        return None
+    except Exception as e:
+        print(f"Error getting existing Cognito config: {e}")
+        return None
+
 def reauthenticate_user(client_id, client_secret):
     boto_session = Session()
     region = boto_session.region_name