-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathgetBraintrustExperimentSummary.ts
237 lines (229 loc) · 5.19 KB
/
getBraintrustExperimentSummary.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import { init } from "mongodb-rag-core/braintrust";
export interface GetBraintrustExperimentSummary {
experimentName: string;
projectName: string;
apiKey: string;
}
export async function getBraintrustExperimentSummary({
projectName,
experimentName,
apiKey,
}: GetBraintrustExperimentSummary): Promise<unknown> {
const experiment = await init(projectName, {
experiment: experimentName,
apiKey,
open: true,
});
const id = await experiment.id;
const metadata = (await fetch(
`https://api.braintrust.dev/v1/experiment/${id}`,
{
headers: {
Authorization: `Bearer ${apiKey}`,
},
}
).then((res) => res.json())) as GetExperimentMetadataResponse;
const summary = (await fetch(
`https://api.braintrust.dev/v1/experiment/${id}/summarize?summarize_scores=true&comparison_experiment_id=${id}`,
{
headers: {
Authorization: `Bearer ${apiKey}`,
},
}
).then((res) => res.json())) as GetExperimentSummaryResponse;
return { metadata, summary };
}
// ---
// Types from the Braintrust API docs
// ---
/**
Metadata about the state of the repo when the experiment was created
*/
export type RepoInfo = {
/**
SHA of most recent commit
*/
commit?: string | null;
/**
Name of the branch the most recent commit belongs to
*/
branch?: string | null;
/**
Name of the tag on the most recent commit
*/
tag?: string | null;
/**
Whether or not the repo had uncommitted changes when snapshotted
*/
dirty?: boolean | null;
/**
Name of the author of the most recent commit
*/
author_name?: string | null;
/**
Email of the author of the most recent commit
*/
author_email?: string | null;
/**
Most recent commit message
*/
commit_message?: string | null;
/**
Time of the most recent commit
*/
commit_time?: string | null;
/**
If the repo was dirty when run, this includes the diff between the current state of the repo and the most recent commit.
*/
git_diff?: string | null;
};
export interface GetExperimentMetadataResponse {
/**
Unique identifier for the experiment
*/
id: string;
/**
Unique identifier for the project that the experiment belongs under
*/
project_id: string;
/**
Name of the experiment. Within a project, experiment names are unique
*/
name: string;
/**
Textual description of the experiment
*/
description?: string | null;
/**
Date of experiment creation
*/
created?: string | null;
repo_info?: RepoInfo;
/**
Commit, taken directly from `repo_info.commit`
*/
commit?: string | null;
/**
Id of default base experiment to compare against when viewing this experiment
*/
base_exp_id?: string | null;
/**
Date of experiment deletion, or null if the experiment is still active
*/
deleted_at?: string | null;
/**
Identifier of the linked dataset, or null if the experiment is not linked to a dataset
*/
dataset_id?: string | null;
/**
Version number of the linked dataset the experiment was run against. This can be used to reproduce the experiment after the dataset has been modified.
*/
dataset_version?: string | null;
/**
Whether or not the experiment is public. Public experiments can be viewed by anybody inside or outside the organization
*/
public: boolean;
/**
Identifies the user who created the experiment
*/
user_id?: string | null;
/**
User-controlled metadata about the experiment
*/
metadata?: {
[k: string]: {
[k: string]: unknown;
};
} | null;
}
/**
Summary of an experiment
*/
export interface GetExperimentSummaryResponse {
/**
Name of the project that the experiment belongs to
*/
project_name: string;
/**
Name of the experiment
*/
experiment_name: string;
/**
URL to the project's page in the Braintrust app
*/
project_url: string;
/**
URL to the experiment's page in the Braintrust app
*/
experiment_url: string;
/**
The experiment which scores are baselined against
*/
comparison_experiment_name?: string | null;
/**
Summary of the experiment's scores
*/
scores?: {
[k: string]: ScoreSummary;
} | null;
/**
Summary of the experiment's metrics
*/
metrics?: {
[k: string]: MetricSummary;
} | null;
}
/**
Summary of a score's performance
*/
export interface ScoreSummary {
/**
Name of the score
*/
name: string;
/**
Average score across all examples
*/
score: number;
/**
Difference in score between the current and comparison experiment
*/
diff?: number;
/**
Number of improvements in the score
*/
improvements: number;
/**
Number of regressions in the score
*/
regressions: number;
}
/**
Summary of a metric's performance
*/
export interface MetricSummary {
/**
Name of the metric
*/
name: string;
/**
Average metric across all examples
*/
metric: number;
/**
Unit label for the metric
*/
unit: string;
/**
Difference in metric between the current and comparison experiment
*/
diff?: number;
/**
Number of improvements in the metric
*/
improvements: number;
/**
Number of regressions in the metric
*/
regressions: number;
}