-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathmongodbUniversityAllQuestionBenchmark.ts
69 lines (62 loc) · 2.03 KB
/
mongodbUniversityAllQuestionBenchmark.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import { getOpenAiEndpointAndApiKey, models } from "mongodb-rag-core/models";
import "dotenv/config";
import PromisePool from "@supercharge/promise-pool";
import { runQuizQuestionEval } from "./QuizQuestionEval";
import { getQuizQuestionEvalCasesFromBraintrust } from "./getQuizQuestionEvalCasesFromBraintrust";
import { mongoDbQuizQuestionExamples } from "./mongoDbQuizQuestionExamples";
import { OpenAI } from "mongodb-rag-core/openai";
async function main() {
const DEFAULT_MAX_CONCURRENCY = 15;
const { RUN_ID } = process.env;
const projectName = "mongodb-multiple-choice";
const datasetName = "university-quiz-badge-questions";
const data = await getQuizQuestionEvalCasesFromBraintrust({
projectName,
datasetName,
});
// These were the requested models to evaluate
const modelsToEvaluate = [
"gpt-4o",
"claude-35-sonnet-v2",
"llama-3.1-70b",
"nova-pro-v1:0",
"mistral-large-2",
"gemini-2-flash",
];
const modelExperiments = models.filter((m) =>
modelsToEvaluate.includes(m.label)
);
// Process models in parallel
await PromisePool.for(modelExperiments)
.withConcurrency(modelsToEvaluate.length)
.process(async (modelInfo) => {
let experimentName = modelInfo.label;
if (RUN_ID) {
experimentName += `?runId=${RUN_ID}`;
}
console.log(`Running experiment: ${experimentName}`);
try {
await runQuizQuestionEval({
projectName,
model: modelInfo.deployment,
openaiClient: new OpenAI({
...(await getOpenAiEndpointAndApiKey(modelInfo)),
}),
experimentName,
additionalMetadata: {
...modelInfo,
},
maxConcurrency: modelInfo.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
data,
promptOptions: {
subject: "MongoDB",
quizQuestionExamples: mongoDbQuizQuestionExamples,
},
});
} catch (err) {
console.error("Error running Braintrust");
console.error(err);
}
});
}
main();