ServiceNow · RohitP2005 · Feb 10, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/src/agentlab/llm/traces/config.py b/src/agentlab/llm/traces/config.py
@@ -0,0 +1,5 @@
+HF_USERNAME = "your_username"
+HF_INDEX_DATASET = "your_username/agent_traces_index"
+HF_TRACE_DATASET = "your_username/agent_traces_data"
+WHITELISTED_BENCHMARKS = ["benchmark1", "benchmark2"]
+
diff --git a/src/agentlab/llm/traces/query.py b/src/agentlab/llm/traces/query.py
@@ -0,0 +1,29 @@
+from datasets import load_dataset
+import requests
+
+# Hugging Face dataset name for the index
+INDEX_DATASET = "your_username/agent_traces_index"
+
+# Function to query traces based on LLM and benchmark
+def query_traces(llm=None, benchmark=None):
+    dataset = load_dataset(INDEX_DATASET, split="train")
+    df = dataset.to_pandas()
+
+    if llm:
+        df = df[df["llm"] == llm]
+    if benchmark:
+        df = df[df["benchmark"] == benchmark]
+
+    return df[["exp_id", "study_name", "trace_pointer"]].to_dict(orient="records")
+
+# Function to download a trace based on exp_id
+def download_trace(exp_id: str, save_path: str):
+    dataset = load_dataset(INDEX_DATASET, split="train")
+    df = dataset.to_pandas()
+    trace_url = df[df["exp_id"] == exp_id]["trace_pointer"].values[0]
+
+    response = requests.get(trace_url)
+    with open(save_path, "wb") as f:
+        f.write(response.content)
+    print(f"Downloaded trace {exp_id} to {save_path}")
+
diff --git a/src/agentlab/llm/traces/uploads.py b/src/agentlab/llm/traces/uploads.py
@@ -0,0 +1,57 @@
+from datasets import Dataset, load_dataset
+from huggingface_hub import HfApi
+import pandas as pd
+
+# Hugging Face dataset names
+INDEX_DATASET = "/agent_traces_index"
+TRACE_DATASET = "/agent_traces_data"
+
+# Hugging Face API instance
+api = HfApi()
+
+def upload_index_data(index_df: pd.DataFrame):
+    dataset = Dataset.from_pandas(index_df)
+    dataset.push_to_hub(INDEX_DATASET, split="train")
+
+def upload_trace(trace_file: str, exp_id: str):
+    api.upload_file(
+        path_or_fileobj=trace_file,
+        path_in_repo=f"{exp_id}.zip",
+        repo_id=TRACE_DATASET,
+        repo_type="dataset",
+    )
+
+def add_study(exp_id: str, study_name: str, llm: str, benchmark: str, trace_file: str):
+    # Check if the benchmark is whitelisted
+    WHITELISTED_BENCHMARKS = ["benchmark1", "benchmark2"]
+    if benchmark not in WHITELISTED_BENCHMARKS:
+        raise ValueError("Benchmark not whitelisted")
+
+    # Assign a license based on LLM and benchmark
+    LICENSES = {
+        ("GPT-4", "benchmark1"): "MIT",
+        ("Llama2", "benchmark2"): "Apache-2.0",
+    }
+    license_type = LICENSES.get((llm, benchmark), "Unknown")
+
+    # Upload trace file
+    upload_trace(trace_file, exp_id)
+
+    # Create metadata entry
+    index_entry = {
+        "exp_id": exp_id,
+        "study_name": study_name,
+        "llm": llm,
+        "benchmark": benchmark,
+        "license": license_type,
+        "trace_pointer": f"https://huggingface.co/datasets/{TRACE_DATASET}/resolve/main/{exp_id}.zip",
+    }
+
+    # Load the existing index dataset and add new entry
+    dataset = load_dataset(INDEX_DATASET, split="train")
+    df = dataset.to_pandas()
+    df = df.append(index_entry, ignore_index=True)
+    upload_index_data(df)
+
+    print(f"Study {exp_id} added successfully!")
+