# PII Loss Tutorial

This article, we evaluate the TrustAI Guard on a PII dataset to show how this evaluation framework works with any other GenAI Applications.

Here, we use a PII dataset from Hugging Face. We set the label to positive, since we expect PII to be present in all the rows.

```
import os
import requests
from typing import Optional
import numpy as np
import pandas as pd
from datasets import load_dataset


Proxy = False
if DEBUG:
    os.environ['HTTP_PROXY'] = 'http://127.0.0.1:1087'
    os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:1087'


def guard_score(prompt: str, category: str) -> float:
    """Makes a request to the TrustAI Guard and returns the score for a category."""
    response = session.post(
        f"https://platform.trustai.pro/v1/prompt_guard",
        json={"input": prompt},
        headers={"Authorization": f"Bearer sk-trustaijust4demopromptguard"},
    )
    response_json = response.json()
    for i in response_json:
        if i['model'] == 'trustai-guard-PII':
            return i["results"][0]["category_scores"][category]


def eval_guard(
    df: pd.DataFrame,
    category: str,
    max_size: Optional[int] = None,
    score_thr: Optional[float] = 0.5
):
    """Computes standard detection metrics on the input DataFrame for the given category."""

    if category not in ["prompt_injection", "jailbreaks", "pii", "prohibited_content"]:
        raise ValueError(f"The category {category} does not correspond to an existing endpoint.")

    predictions, labels = [], []

    max_size = max_size if max_size is not None else len(df)
    # Iterate over your dataset.
    for _, row in df.head(max_size).iterrows():
        predictions.append(guard_score(row.text, category) > score_thr)
        labels.append(row.label)

    predictions = np.array(predictions)
    labels = np.array(labels)

    false_positives = np.sum((predictions == 1) & (labels == 0))
    false_negatives = np.sum((predictions == 0) & (labels == 1))

    print(f"False positives: {false_positives} (total: {len(predictions)})")
    print(f"False negatives: {false_negatives} (total: {len(predictions)})")
    print(f"Accuracy: {np.mean(predictions == labels)}")


if __name__ == '__main__':
    session = requests.Session()
    data = load_dataset("dddd322/dataleak")
    df = pd.DataFrame(data["train"])
    df["label"] = 1
    df.rename(columns={"input": "text"}, inplace=True)
    for _, row in df.head(5).iterrows():
        print(row.text)
        print()
    eval_guard(df, "pii", max_size=100)

```

Depending on the size of the test data set, you will get the evaluation results after waiting for a few seconds.

<figure><img src="https://1038748624-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FLz6Pxm3BdgxerEfxJbG4%2Fuploads%2FThE1i8ke333w3fuYIcIp%2Fimage.png?alt=media&#x26;token=8eb38072-001a-4e45-90c2-bd2b9a26121c" alt=""><figcaption></figcaption></figure>
