From d84d8e8906e94512644fcbc0fe1a969ac2bd61fb Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Tue, 15 Apr 2025 15:35:13 -0700 Subject: [PATCH 01/14] Vaihi add langmodel types. (#8927) * Adding LanguageModel types. These are based off https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#full-api-surface-in-web-idl * Adding LanguageModel types. * Remove bunch of exports * yarn formatted * after lint --- packages/vertexai/src/types/language-model.ts | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 packages/vertexai/src/types/language-model.ts diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts new file mode 100644 index 00000000000..5bfb38beea4 --- /dev/null +++ b/packages/vertexai/src/types/language-model.ts @@ -0,0 +1,83 @@ +/** + * @license + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export interface LanguageModel extends EventTarget { + create(options?: LanguageModelCreateOptions): Promise; + availability(options?: LanguageModelCreateCoreOptions): Promise; + prompt( + input: LanguageModelPrompt, + options?: LanguageModelPromptOptions + ): Promise; + promptStreaming( + input: LanguageModelPrompt, + options?: LanguageModelPromptOptions + ): ReadableStream; + measureInputUsage( + input: LanguageModelPrompt, + options?: LanguageModelPromptOptions + ): Promise; + destroy(): undefined; +} +enum Availability { + 'unavailable', + 'downloadable', + 'downloading', + 'available' +} +interface LanguageModelCreateCoreOptions { + topK?: number; + temperature?: number; + expectedInputs?: LanguageModelExpectedInput[]; +} +interface LanguageModelCreateOptions extends LanguageModelCreateCoreOptions { + signal?: AbortSignal; + systemPrompt?: string; + initialPrompts?: LanguageModelInitialPrompts; +} +interface LanguageModelPromptOptions { + signal?: AbortSignal; +} +interface LanguageModelExpectedInput { + type: LanguageModelMessageType; + languages?: string[]; +} +type LanguageModelPrompt = + | LanguageModelMessage[] + | LanguageModelMessageShorthand[] + | string; +type LanguageModelInitialPrompts = + | LanguageModelMessage[] + | LanguageModelMessageShorthand[]; +interface LanguageModelMessage { + role: LanguageModelMessageRole; + content: LanguageModelMessageContent[]; +} +interface LanguageModelMessageShorthand { + role: LanguageModelMessageRole; + content: string; +} +interface LanguageModelMessageContent { + type: LanguageModelMessageType; + content: LanguageModelMessageContentValue; +} +type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; +type LanguageModelMessageType = 'text' | 'image' | 'audio'; +type LanguageModelMessageContentValue = + | ImageBitmapSource + | AudioBuffer + | BufferSource + | string; From 94bd80304ff0386060d28bbf1b4b6740afdc0dcc Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Wed, 16 Apr 2025 11:27:14 -0700 Subject: [PATCH 02/14] Define HybridParams (#8935) Co-authored-by: Erik Eldridge --- common/api-review/vertexai.api.md | 24 ++++++++- docs-devsite/_toc.yaml | 2 + docs-devsite/vertexai.hybridparams.md | 51 +++++++++++++++++++ docs-devsite/vertexai.md | 32 +++++++++--- docs-devsite/vertexai.modelparams.md | 2 +- docs-devsite/vertexai.requestoptions.md | 2 +- packages/vertexai/src/api.ts | 18 +++++-- packages/vertexai/src/types/enums.ts | 10 ++++ packages/vertexai/src/types/language-model.ts | 5 +- packages/vertexai/src/types/requests.ts | 14 ++++- 10 files changed, 145 insertions(+), 15 deletions(-) create mode 100644 docs-devsite/vertexai.hybridparams.md diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index e7f00c2f4e0..fc7d9182586 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -344,7 +344,7 @@ export class GenerativeModel extends VertexAIModel { } // @public -export function getGenerativeModel(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: RequestOptions): GenerativeModel; +export function getGenerativeModel(vertexAI: VertexAI, onCloudOrHybridParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; // @beta export function getImagenModel(vertexAI: VertexAI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel; @@ -416,6 +416,18 @@ export enum HarmSeverity { HARM_SEVERITY_NEGLIGIBLE = "HARM_SEVERITY_NEGLIGIBLE" } +// @public +export interface HybridParams { + // (undocumented) + mode?: InferenceMode; + // (undocumented) + onCloudParams?: ModelParams; + // Warning: (ae-forgotten-export) The symbol "LanguageModelCreateOptions" needs to be exported by the entry point index.d.ts + // + // (undocumented) + onDeviceParams?: LanguageModelCreateOptions; +} + // @beta export enum ImagenAspectRatio { LANDSCAPE_16x9 = "16:9", @@ -500,6 +512,16 @@ export interface ImagenSafetySettings { safetyFilterLevel?: ImagenSafetyFilterLevel; } +// @public +export enum InferenceMode { + // (undocumented) + ONLY_ON_CLOUD = "ONLY_ON_CLOUD", + // (undocumented) + ONLY_ON_DEVICE = "ONLY_ON_DEVICE", + // (undocumented) + PREFER_ON_DEVICE = "PREFER_ON_DEVICE" +} + // @public export interface InlineDataPart { // (undocumented) diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml index 665222edb9d..64e24534590 100644 --- a/docs-devsite/_toc.yaml +++ b/docs-devsite/_toc.yaml @@ -536,6 +536,8 @@ toc: path: /docs/reference/js/vertexai.groundingattribution.md - title: GroundingMetadata path: /docs/reference/js/vertexai.groundingmetadata.md + - title: HybridParams + path: /docs/reference/js/vertexai.hybridparams.md - title: ImagenGCSImage path: /docs/reference/js/vertexai.imagengcsimage.md - title: ImagenGenerationConfig diff --git a/docs-devsite/vertexai.hybridparams.md b/docs-devsite/vertexai.hybridparams.md new file mode 100644 index 00000000000..ea175a7234b --- /dev/null +++ b/docs-devsite/vertexai.hybridparams.md @@ -0,0 +1,51 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# HybridParams interface +Configures on-device and on-cloud inference. + +Signature: + +```typescript +export interface HybridParams +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [mode](./vertexai.hybridparams.md#hybridparamsmode) | [InferenceMode](./vertexai.md#inferencemode) | | +| [onCloudParams](./vertexai.hybridparams.md#hybridparamsoncloudparams) | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | | +| [onDeviceParams](./vertexai.hybridparams.md#hybridparamsondeviceparams) | LanguageModelCreateOptions | | + +## HybridParams.mode + +Signature: + +```typescript +mode?: InferenceMode; +``` + +## HybridParams.onCloudParams + +Signature: + +```typescript +onCloudParams?: ModelParams; +``` + +## HybridParams.onDeviceParams + +Signature: + +```typescript +onDeviceParams?: LanguageModelCreateOptions; +``` diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md index f67254eef20..a3ba28ad609 100644 --- a/docs-devsite/vertexai.md +++ b/docs-devsite/vertexai.md @@ -19,7 +19,7 @@ The Vertex AI in Firebase Web SDK. | function(app, ...) | | [getVertexAI(app, options)](./vertexai.md#getvertexai_04094cf) | Returns a [VertexAI](./vertexai.vertexai.md#vertexai_interface) instance for the given app. | | function(vertexAI, ...) | -| [getGenerativeModel(vertexAI, modelParams, requestOptions)](./vertexai.md#getgenerativemodel_e3037c9) | Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. | +| [getGenerativeModel(vertexAI, onCloudOrHybridParams, requestOptions)](./vertexai.md#getgenerativemodel_202434f) | Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. | | [getImagenModel(vertexAI, modelParams, requestOptions)](./vertexai.md#getimagenmodel_812c375) | (Public Preview) Returns an [ImagenModel](./vertexai.imagenmodel.md#imagenmodel_class) class with methods for using Imagen.Only Imagen 3 models (named imagen-3.0-*) are supported. | ## Classes @@ -55,6 +55,7 @@ The Vertex AI in Firebase Web SDK. | [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | (Public Preview) Aspect ratios for Imagen images.To specify an aspect ratio for generated images, set the aspectRatio property in your [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface).See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. | | [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | (Public Preview) A filter level controlling whether generation of images containing people or faces is allowed.See the personGeneration documentation for more details. | | [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | (Public Preview) A filter level controlling how aggressively to filter sensitive content.Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, violence, sexual, derogatory, and toxic). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. | +| [InferenceMode](./vertexai.md#inferencemode) | Determines whether inference happens on-device or on-cloud. | | [Modality](./vertexai.md#modality) | Content part modality. | | [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | | [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that [VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class) can have. | @@ -91,6 +92,7 @@ The Vertex AI in Firebase Web SDK. | [GenerativeContentBlob](./vertexai.generativecontentblob.md#generativecontentblob_interface) | Interface for sending an image. | | [GroundingAttribution](./vertexai.groundingattribution.md#groundingattribution_interface) | | | [GroundingMetadata](./vertexai.groundingmetadata.md#groundingmetadata_interface) | Metadata returned to client when grounding is enabled. | +| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | Configures on-device and on-cloud inference. | | [ImagenGCSImage](./vertexai.imagengcsimage.md#imagengcsimage_interface) | An image generated by Imagen, stored in a Cloud Storage for Firebase bucket.This feature is not available yet. | | [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface) | (Public Preview) Configuration options for generating images with Imagen.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images-imagen) for more details. | | [ImagenGenerationResponse](./vertexai.imagengenerationresponse.md#imagengenerationresponse_interface) | (Public Preview) The response from a request to generate images with Imagen. | @@ -99,10 +101,10 @@ The Vertex AI in Firebase Web SDK. | [ImagenSafetySettings](./vertexai.imagensafetysettings.md#imagensafetysettings_interface) | (Public Preview) Settings for controlling the aggressiveness of filtering out sensitive content.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details. | | [InlineDataPart](./vertexai.inlinedatapart.md#inlinedatapart_interface) | Content part interface if the part represents an image. | | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface) | Represents token counting info for a single modality. | -| [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9). | +| [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). | | [ObjectSchemaInterface](./vertexai.objectschemainterface.md#objectschemainterface_interface) | Interface for [ObjectSchema](./vertexai.objectschema.md#objectschema_class) class. | | [PromptFeedback](./vertexai.promptfeedback.md#promptfeedback_interface) | If the prompt was blocked, this will be populated with blockReason and the relevant safetyRatings. | -| [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9). | +| [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). | | [RetrievedContextAttribution](./vertexai.retrievedcontextattribution.md#retrievedcontextattribution_interface) | | | [SafetyRating](./vertexai.safetyrating.md#safetyrating_interface) | A safety rating associated with a [GenerateContentCandidate](./vertexai.generatecontentcandidate.md#generatecontentcandidate_interface) | | [SafetySetting](./vertexai.safetysetting.md#safetysetting_interface) | Safety setting that can be sent as part of request parameters. | @@ -160,14 +162,14 @@ export declare function getVertexAI(app?: FirebaseApp, options?: VertexAIOptions ## function(vertexAI, ...) -### getGenerativeModel(vertexAI, modelParams, requestOptions) {:#getgenerativemodel_e3037c9} +### getGenerativeModel(vertexAI, onCloudOrHybridParams, requestOptions) {:#getgenerativemodel_202434f} Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. Signature: ```typescript -export declare function getGenerativeModel(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: RequestOptions): GenerativeModel; +export declare function getGenerativeModel(vertexAI: VertexAI, onCloudOrHybridParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; ``` #### Parameters @@ -175,7 +177,7 @@ export declare function getGenerativeModel(vertexAI: VertexAI, modelParams: Mode | Parameter | Type | Description | | --- | --- | --- | | vertexAI | [VertexAI](./vertexai.vertexai.md#vertexai_interface) | | -| modelParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | | +| onCloudOrHybridParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) \| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | | | requestOptions | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | | Returns: @@ -489,6 +491,24 @@ export declare enum ImagenSafetyFilterLevel | BLOCK\_NONE | "block_none" | (Public Preview) The least aggressive filtering level; blocks very few sensitive prompts and responses.Access to this feature is restricted and may require your case to be reviewed and approved by Cloud support. | | BLOCK\_ONLY\_HIGH | "block_only_high" | (Public Preview) Blocks few sensitive prompts and responses. | +## InferenceMode + +Determines whether inference happens on-device or on-cloud. + +Signature: + +```typescript +export declare enum InferenceMode +``` + +## Enumeration Members + +| Member | Value | Description | +| --- | --- | --- | +| ONLY\_ON\_CLOUD | "ONLY_ON_CLOUD" | | +| ONLY\_ON\_DEVICE | "ONLY_ON_DEVICE" | | +| PREFER\_ON\_DEVICE | "PREFER_ON_DEVICE" | | + ## Modality Content part modality. diff --git a/docs-devsite/vertexai.modelparams.md b/docs-devsite/vertexai.modelparams.md index d3963d240eb..6645d498d8e 100644 --- a/docs-devsite/vertexai.modelparams.md +++ b/docs-devsite/vertexai.modelparams.md @@ -10,7 +10,7 @@ https://github.com/firebase/firebase-js-sdk {% endcomment %} # ModelParams interface -Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9). +Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). Signature: diff --git a/docs-devsite/vertexai.requestoptions.md b/docs-devsite/vertexai.requestoptions.md index dcd0c552ecb..334ce7956d6 100644 --- a/docs-devsite/vertexai.requestoptions.md +++ b/docs-devsite/vertexai.requestoptions.md @@ -10,7 +10,7 @@ https://github.com/firebase/firebase-js-sdk {% endcomment %} # RequestOptions interface -Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9). +Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). Signature: diff --git a/packages/vertexai/src/api.ts b/packages/vertexai/src/api.ts index 7843a5bdeee..323cfd10e80 100644 --- a/packages/vertexai/src/api.ts +++ b/packages/vertexai/src/api.ts @@ -23,6 +23,7 @@ import { VertexAIService } from './service'; import { VertexAI, VertexAIOptions } from './public-types'; import { ImagenModelParams, + HybridParams, ModelParams, RequestOptions, VertexAIErrorCode @@ -70,16 +71,27 @@ export function getVertexAI( */ export function getGenerativeModel( vertexAI: VertexAI, - modelParams: ModelParams, + onCloudOrHybridParams: ModelParams | HybridParams, requestOptions?: RequestOptions ): GenerativeModel { - if (!modelParams.model) { + // Disambiguates onCloudOrHybridParams input. + const hybridParams = onCloudOrHybridParams as HybridParams; + let onCloudParams: ModelParams; + if (hybridParams.mode) { + onCloudParams = hybridParams.onCloudParams || { + model: 'gemini-2.0-flash-lite' + }; + } else { + onCloudParams = onCloudOrHybridParams as ModelParams; + } + + if (!onCloudParams.model) { throw new VertexAIError( VertexAIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })` ); } - return new GenerativeModel(vertexAI, modelParams, requestOptions); + return new GenerativeModel(vertexAI, onCloudParams, requestOptions); } /** diff --git a/packages/vertexai/src/types/enums.ts b/packages/vertexai/src/types/enums.ts index a9481d40f5f..1f81ed79a8f 100644 --- a/packages/vertexai/src/types/enums.ts +++ b/packages/vertexai/src/types/enums.ts @@ -240,3 +240,13 @@ export enum Modality { */ DOCUMENT = 'DOCUMENT' } + +/** + * Determines whether inference happens on-device or on-cloud. + * @public + */ +export enum InferenceMode { + PREFER_ON_DEVICE = 'PREFER_ON_DEVICE', + ONLY_ON_DEVICE = 'ONLY_ON_DEVICE', + ONLY_ON_CLOUD = 'ONLY_ON_CLOUD' +} diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts index 5bfb38beea4..e564ca467b4 100644 --- a/packages/vertexai/src/types/language-model.ts +++ b/packages/vertexai/src/types/language-model.ts @@ -38,12 +38,13 @@ enum Availability { 'downloading', 'available' } -interface LanguageModelCreateCoreOptions { +export interface LanguageModelCreateCoreOptions { topK?: number; temperature?: number; expectedInputs?: LanguageModelExpectedInput[]; } -interface LanguageModelCreateOptions extends LanguageModelCreateCoreOptions { +export interface LanguageModelCreateOptions + extends LanguageModelCreateCoreOptions { signal?: AbortSignal; systemPrompt?: string; initialPrompts?: LanguageModelInitialPrompts; diff --git a/packages/vertexai/src/types/requests.ts b/packages/vertexai/src/types/requests.ts index c15258b06d0..c643fcd6084 100644 --- a/packages/vertexai/src/types/requests.ts +++ b/packages/vertexai/src/types/requests.ts @@ -17,11 +17,13 @@ import { TypedSchema } from '../requests/schema-builder'; import { Content, Part } from './content'; +import { LanguageModelCreateOptions } from './language-model'; import { FunctionCallingMode, HarmBlockMethod, HarmBlockThreshold, - HarmCategory + HarmCategory, + InferenceMode } from './enums'; import { ObjectSchemaInterface, SchemaRequest } from './schema'; @@ -213,3 +215,13 @@ export interface FunctionCallingConfig { mode?: FunctionCallingMode; allowedFunctionNames?: string[]; } + +/** + * Configures on-device and on-cloud inference. + * @public + */ +export interface HybridParams { + mode?: InferenceMode; + onDeviceParams?: LanguageModelCreateOptions; + onCloudParams?: ModelParams; +} From 8971cc6aec7d8234b2666cc0b78d51cb94b368ae Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Wed, 16 Apr 2025 12:54:10 -0700 Subject: [PATCH 03/14] Adding smoke test for new hybrid params (#8937) * Adding smoke test for new hybrid params * Use the existing name of the model params input --------- Co-authored-by: Erik Eldridge --- common/api-review/vertexai.api.md | 2 +- docs-devsite/vertexai.md | 12 ++++++------ docs-devsite/vertexai.modelparams.md | 2 +- docs-devsite/vertexai.requestoptions.md | 2 +- packages/vertexai/src/api.test.ts | 14 +++++++++++++- packages/vertexai/src/api.ts | 8 ++++---- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index fc7d9182586..5c8ef330cbe 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -344,7 +344,7 @@ export class GenerativeModel extends VertexAIModel { } // @public -export function getGenerativeModel(vertexAI: VertexAI, onCloudOrHybridParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; +export function getGenerativeModel(vertexAI: VertexAI, modelParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; // @beta export function getImagenModel(vertexAI: VertexAI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel; diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md index a3ba28ad609..305d0f09b61 100644 --- a/docs-devsite/vertexai.md +++ b/docs-devsite/vertexai.md @@ -19,7 +19,7 @@ The Vertex AI in Firebase Web SDK. | function(app, ...) | | [getVertexAI(app, options)](./vertexai.md#getvertexai_04094cf) | Returns a [VertexAI](./vertexai.vertexai.md#vertexai_interface) instance for the given app. | | function(vertexAI, ...) | -| [getGenerativeModel(vertexAI, onCloudOrHybridParams, requestOptions)](./vertexai.md#getgenerativemodel_202434f) | Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. | +| [getGenerativeModel(vertexAI, modelParams, requestOptions)](./vertexai.md#getgenerativemodel_8dbc150) | Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. | | [getImagenModel(vertexAI, modelParams, requestOptions)](./vertexai.md#getimagenmodel_812c375) | (Public Preview) Returns an [ImagenModel](./vertexai.imagenmodel.md#imagenmodel_class) class with methods for using Imagen.Only Imagen 3 models (named imagen-3.0-*) are supported. | ## Classes @@ -101,10 +101,10 @@ The Vertex AI in Firebase Web SDK. | [ImagenSafetySettings](./vertexai.imagensafetysettings.md#imagensafetysettings_interface) | (Public Preview) Settings for controlling the aggressiveness of filtering out sensitive content.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details. | | [InlineDataPart](./vertexai.inlinedatapart.md#inlinedatapart_interface) | Content part interface if the part represents an image. | | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface) | Represents token counting info for a single modality. | -| [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). | +| [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_8dbc150). | | [ObjectSchemaInterface](./vertexai.objectschemainterface.md#objectschemainterface_interface) | Interface for [ObjectSchema](./vertexai.objectschema.md#objectschema_class) class. | | [PromptFeedback](./vertexai.promptfeedback.md#promptfeedback_interface) | If the prompt was blocked, this will be populated with blockReason and the relevant safetyRatings. | -| [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). | +| [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_8dbc150). | | [RetrievedContextAttribution](./vertexai.retrievedcontextattribution.md#retrievedcontextattribution_interface) | | | [SafetyRating](./vertexai.safetyrating.md#safetyrating_interface) | A safety rating associated with a [GenerateContentCandidate](./vertexai.generatecontentcandidate.md#generatecontentcandidate_interface) | | [SafetySetting](./vertexai.safetysetting.md#safetysetting_interface) | Safety setting that can be sent as part of request parameters. | @@ -162,14 +162,14 @@ export declare function getVertexAI(app?: FirebaseApp, options?: VertexAIOptions ## function(vertexAI, ...) -### getGenerativeModel(vertexAI, onCloudOrHybridParams, requestOptions) {:#getgenerativemodel_202434f} +### getGenerativeModel(vertexAI, modelParams, requestOptions) {:#getgenerativemodel_8dbc150} Returns a [GenerativeModel](./vertexai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. Signature: ```typescript -export declare function getGenerativeModel(vertexAI: VertexAI, onCloudOrHybridParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; +export declare function getGenerativeModel(vertexAI: VertexAI, modelParams: ModelParams | HybridParams, requestOptions?: RequestOptions): GenerativeModel; ``` #### Parameters @@ -177,7 +177,7 @@ export declare function getGenerativeModel(vertexAI: VertexAI, onCloudOrHybridPa | Parameter | Type | Description | | --- | --- | --- | | vertexAI | [VertexAI](./vertexai.vertexai.md#vertexai_interface) | | -| onCloudOrHybridParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) \| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | | +| modelParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) \| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | | | requestOptions | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | | Returns: diff --git a/docs-devsite/vertexai.modelparams.md b/docs-devsite/vertexai.modelparams.md index 6645d498d8e..0776b198cf1 100644 --- a/docs-devsite/vertexai.modelparams.md +++ b/docs-devsite/vertexai.modelparams.md @@ -10,7 +10,7 @@ https://github.com/firebase/firebase-js-sdk {% endcomment %} # ModelParams interface -Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). +Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_8dbc150). Signature: diff --git a/docs-devsite/vertexai.requestoptions.md b/docs-devsite/vertexai.requestoptions.md index 334ce7956d6..4e1ce2b86e3 100644 --- a/docs-devsite/vertexai.requestoptions.md +++ b/docs-devsite/vertexai.requestoptions.md @@ -10,7 +10,7 @@ https://github.com/firebase/firebase-js-sdk {% endcomment %} # RequestOptions interface -Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_202434f). +Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_8dbc150). Signature: diff --git a/packages/vertexai/src/api.test.ts b/packages/vertexai/src/api.test.ts index 4a0b978d858..a38358f806f 100644 --- a/packages/vertexai/src/api.test.ts +++ b/packages/vertexai/src/api.test.ts @@ -14,7 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -import { ImagenModelParams, ModelParams, VertexAIErrorCode } from './types'; +import { + ImagenModelParams, + InferenceMode, + ModelParams, + VertexAIErrorCode +} from './types'; import { VertexAIError } from './errors'; import { ImagenModel, getGenerativeModel, getImagenModel } from './api'; import { expect } from 'chai'; @@ -112,6 +117,13 @@ describe('Top level API', () => { ); } }); + it('getGenerativeModel with HybridParams sets the model', () => { + const genModel = getGenerativeModel(fakeVertexAI, { + mode: InferenceMode.ONLY_ON_CLOUD, + onCloudParams: { model: 'my-model' } + }); + expect(genModel.model).to.equal('publishers/google/models/my-model'); + }); it('getImagenModel throws if no apiKey is provided', () => { const fakeVertexNoApiKey = { ...fakeVertexAI, diff --git a/packages/vertexai/src/api.ts b/packages/vertexai/src/api.ts index 323cfd10e80..7f11dd80844 100644 --- a/packages/vertexai/src/api.ts +++ b/packages/vertexai/src/api.ts @@ -71,18 +71,18 @@ export function getVertexAI( */ export function getGenerativeModel( vertexAI: VertexAI, - onCloudOrHybridParams: ModelParams | HybridParams, + modelParams: ModelParams | HybridParams, requestOptions?: RequestOptions ): GenerativeModel { - // Disambiguates onCloudOrHybridParams input. - const hybridParams = onCloudOrHybridParams as HybridParams; + // Uses the existence of HybridParams.mode to clarify the type of the modelParams input. + const hybridParams = modelParams as HybridParams; let onCloudParams: ModelParams; if (hybridParams.mode) { onCloudParams = hybridParams.onCloudParams || { model: 'gemini-2.0-flash-lite' }; } else { - onCloudParams = onCloudOrHybridParams as ModelParams; + onCloudParams = modelParams as ModelParams; } if (!onCloudParams.model) { From 5a4fb81415e32f66d937455c86325de1a8add8f8 Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Wed, 16 Apr 2025 14:14:29 -0700 Subject: [PATCH 04/14] Moving to in-cloud naming (#8938) Co-authored-by: Erik Eldridge --- common/api-review/vertexai.api.md | 14 ++++-------- docs-devsite/vertexai.generativemodel.md | 11 ++++++++++ docs-devsite/vertexai.hybridparams.md | 22 ++++++++++++------- docs-devsite/vertexai.md | 12 +++++----- packages/vertexai/src/api.test.ts | 22 +++++++++++++------ packages/vertexai/src/api.ts | 12 +++++----- .../vertexai/src/models/generative-model.ts | 4 ++++ packages/vertexai/src/types/enums.ts | 16 +++++++++++--- packages/vertexai/src/types/requests.ts | 16 ++++++++++---- 9 files changed, 85 insertions(+), 44 deletions(-) diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index 5c8ef330cbe..d8c189a0059 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -326,6 +326,7 @@ export interface GenerativeContentBlob { export class GenerativeModel extends VertexAIModel { constructor(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: RequestOptions); countTokens(request: CountTokensRequest | string | Array): Promise; + static DEFAULT_HYBRID_IN_CLOUD_MODEL: string; generateContent(request: GenerateContentRequest | string | Array): Promise; generateContentStream(request: GenerateContentRequest | string | Array): Promise; // (undocumented) @@ -418,13 +419,9 @@ export enum HarmSeverity { // @public export interface HybridParams { - // (undocumented) - mode?: InferenceMode; - // (undocumented) - onCloudParams?: ModelParams; + inCloudParams?: ModelParams; + mode: InferenceMode; // Warning: (ae-forgotten-export) The symbol "LanguageModelCreateOptions" needs to be exported by the entry point index.d.ts - // - // (undocumented) onDeviceParams?: LanguageModelCreateOptions; } @@ -514,11 +511,8 @@ export interface ImagenSafetySettings { // @public export enum InferenceMode { - // (undocumented) - ONLY_ON_CLOUD = "ONLY_ON_CLOUD", - // (undocumented) + ONLY_IN_CLOUD = "ONLY_IN_CLOUD", ONLY_ON_DEVICE = "ONLY_ON_DEVICE", - // (undocumented) PREFER_ON_DEVICE = "PREFER_ON_DEVICE" } diff --git a/docs-devsite/vertexai.generativemodel.md b/docs-devsite/vertexai.generativemodel.md index e4a238b0af5..831ec5d4369 100644 --- a/docs-devsite/vertexai.generativemodel.md +++ b/docs-devsite/vertexai.generativemodel.md @@ -29,6 +29,7 @@ export declare class GenerativeModel extends VertexAIModel | Property | Modifiers | Type | Description | | --- | --- | --- | --- | +| [DEFAULT\_HYBRID\_IN\_CLOUD\_MODEL](./vertexai.generativemodel.md#generativemodeldefault_hybrid_in_cloud_model) | static | string | Defines the name of the default in-cloud model to use for hybrid inference. | | [generationConfig](./vertexai.generativemodel.md#generativemodelgenerationconfig) | | [GenerationConfig](./vertexai.generationconfig.md#generationconfig_interface) | | | [requestOptions](./vertexai.generativemodel.md#generativemodelrequestoptions) | | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | | | [safetySettings](./vertexai.generativemodel.md#generativemodelsafetysettings) | | [SafetySetting](./vertexai.safetysetting.md#safetysetting_interface)\[\] | | @@ -63,6 +64,16 @@ constructor(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: Reque | modelParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | | | requestOptions | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | | +## GenerativeModel.DEFAULT\_HYBRID\_IN\_CLOUD\_MODEL + +Defines the name of the default in-cloud model to use for hybrid inference. + +Signature: + +```typescript +static DEFAULT_HYBRID_IN_CLOUD_MODEL: string; +``` + ## GenerativeModel.generationConfig Signature: diff --git a/docs-devsite/vertexai.hybridparams.md b/docs-devsite/vertexai.hybridparams.md index ea175a7234b..cf847b40fa7 100644 --- a/docs-devsite/vertexai.hybridparams.md +++ b/docs-devsite/vertexai.hybridparams.md @@ -10,7 +10,7 @@ https://github.com/firebase/firebase-js-sdk {% endcomment %} # HybridParams interface -Configures on-device and on-cloud inference. +Toggles hybrid inference. Signature: @@ -22,28 +22,34 @@ export interface HybridParams | Property | Type | Description | | --- | --- | --- | -| [mode](./vertexai.hybridparams.md#hybridparamsmode) | [InferenceMode](./vertexai.md#inferencemode) | | -| [onCloudParams](./vertexai.hybridparams.md#hybridparamsoncloudparams) | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | | -| [onDeviceParams](./vertexai.hybridparams.md#hybridparamsondeviceparams) | LanguageModelCreateOptions | | +| [inCloudParams](./vertexai.hybridparams.md#hybridparamsincloudparams) | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Optional. Specifies advanced params for in-cloud inference. | +| [mode](./vertexai.hybridparams.md#hybridparamsmode) | [InferenceMode](./vertexai.md#inferencemode) | Specifies on-device or in-cloud inference. Defaults to prefer on-device. | +| [onDeviceParams](./vertexai.hybridparams.md#hybridparamsondeviceparams) | LanguageModelCreateOptions | Optional. Specifies advanced params for on-device inference. | -## HybridParams.mode +## HybridParams.inCloudParams + +Optional. Specifies advanced params for in-cloud inference. Signature: ```typescript -mode?: InferenceMode; +inCloudParams?: ModelParams; ``` -## HybridParams.onCloudParams +## HybridParams.mode + +Specifies on-device or in-cloud inference. Defaults to prefer on-device. Signature: ```typescript -onCloudParams?: ModelParams; +mode: InferenceMode; ``` ## HybridParams.onDeviceParams +Optional. Specifies advanced params for on-device inference. + Signature: ```typescript diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md index 305d0f09b61..734a21fa3dc 100644 --- a/docs-devsite/vertexai.md +++ b/docs-devsite/vertexai.md @@ -55,7 +55,7 @@ The Vertex AI in Firebase Web SDK. | [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | (Public Preview) Aspect ratios for Imagen images.To specify an aspect ratio for generated images, set the aspectRatio property in your [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface).See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. | | [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | (Public Preview) A filter level controlling whether generation of images containing people or faces is allowed.See the personGeneration documentation for more details. | | [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | (Public Preview) A filter level controlling how aggressively to filter sensitive content.Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, violence, sexual, derogatory, and toxic). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. | -| [InferenceMode](./vertexai.md#inferencemode) | Determines whether inference happens on-device or on-cloud. | +| [InferenceMode](./vertexai.md#inferencemode) | Determines whether inference happens on-device or in-cloud. | | [Modality](./vertexai.md#modality) | Content part modality. | | [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | | [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that [VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class) can have. | @@ -92,7 +92,7 @@ The Vertex AI in Firebase Web SDK. | [GenerativeContentBlob](./vertexai.generativecontentblob.md#generativecontentblob_interface) | Interface for sending an image. | | [GroundingAttribution](./vertexai.groundingattribution.md#groundingattribution_interface) | | | [GroundingMetadata](./vertexai.groundingmetadata.md#groundingmetadata_interface) | Metadata returned to client when grounding is enabled. | -| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | Configures on-device and on-cloud inference. | +| [HybridParams](./vertexai.hybridparams.md#hybridparams_interface) | Toggles hybrid inference. | | [ImagenGCSImage](./vertexai.imagengcsimage.md#imagengcsimage_interface) | An image generated by Imagen, stored in a Cloud Storage for Firebase bucket.This feature is not available yet. | | [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface) | (Public Preview) Configuration options for generating images with Imagen.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images-imagen) for more details. | | [ImagenGenerationResponse](./vertexai.imagengenerationresponse.md#imagengenerationresponse_interface) | (Public Preview) The response from a request to generate images with Imagen. | @@ -493,7 +493,7 @@ export declare enum ImagenSafetyFilterLevel ## InferenceMode -Determines whether inference happens on-device or on-cloud. +Determines whether inference happens on-device or in-cloud. Signature: @@ -505,9 +505,9 @@ export declare enum InferenceMode | Member | Value | Description | | --- | --- | --- | -| ONLY\_ON\_CLOUD | "ONLY_ON_CLOUD" | | -| ONLY\_ON\_DEVICE | "ONLY_ON_DEVICE" | | -| PREFER\_ON\_DEVICE | "PREFER_ON_DEVICE" | | +| ONLY\_IN\_CLOUD | "ONLY_IN_CLOUD" | Exclusively uses the in-cloud model. | +| ONLY\_ON\_DEVICE | "ONLY_ON_DEVICE" | Exclusively uses the on-device model. Throws if one is not available. | +| PREFER\_ON\_DEVICE | "PREFER_ON_DEVICE" | Uses the on-device model if available, or falls back to the in-cloud model. | ## Modality diff --git a/packages/vertexai/src/api.test.ts b/packages/vertexai/src/api.test.ts index a38358f806f..aeb090e24c5 100644 --- a/packages/vertexai/src/api.test.ts +++ b/packages/vertexai/src/api.test.ts @@ -106,6 +106,21 @@ describe('Top level API', () => { expect(genModel).to.be.an.instanceOf(GenerativeModel); expect(genModel.model).to.equal('publishers/google/models/my-model'); }); + it('getGenerativeModel with HybridParams sets a default model', () => { + const genModel = getGenerativeModel(fakeVertexAI, { + mode: InferenceMode.ONLY_ON_DEVICE + }); + expect(genModel.model).to.equal( + `publishers/google/models/${GenerativeModel.DEFAULT_HYBRID_IN_CLOUD_MODEL}` + ); + }); + it('getGenerativeModel with HybridParams honors a model override', () => { + const genModel = getGenerativeModel(fakeVertexAI, { + mode: InferenceMode.ONLY_IN_CLOUD, + inCloudParams: { model: 'my-model' } + }); + expect(genModel.model).to.equal('publishers/google/models/my-model'); + }); it('getImagenModel throws if no model is provided', () => { try { getImagenModel(fakeVertexAI, {} as ImagenModelParams); @@ -117,13 +132,6 @@ describe('Top level API', () => { ); } }); - it('getGenerativeModel with HybridParams sets the model', () => { - const genModel = getGenerativeModel(fakeVertexAI, { - mode: InferenceMode.ONLY_ON_CLOUD, - onCloudParams: { model: 'my-model' } - }); - expect(genModel.model).to.equal('publishers/google/models/my-model'); - }); it('getImagenModel throws if no apiKey is provided', () => { const fakeVertexNoApiKey = { ...fakeVertexAI, diff --git a/packages/vertexai/src/api.ts b/packages/vertexai/src/api.ts index 7f11dd80844..2b8536dd89f 100644 --- a/packages/vertexai/src/api.ts +++ b/packages/vertexai/src/api.ts @@ -76,22 +76,22 @@ export function getGenerativeModel( ): GenerativeModel { // Uses the existence of HybridParams.mode to clarify the type of the modelParams input. const hybridParams = modelParams as HybridParams; - let onCloudParams: ModelParams; + let inCloudParams: ModelParams; if (hybridParams.mode) { - onCloudParams = hybridParams.onCloudParams || { - model: 'gemini-2.0-flash-lite' + inCloudParams = hybridParams.inCloudParams || { + model: GenerativeModel.DEFAULT_HYBRID_IN_CLOUD_MODEL }; } else { - onCloudParams = modelParams as ModelParams; + inCloudParams = modelParams as ModelParams; } - if (!onCloudParams.model) { + if (!inCloudParams.model) { throw new VertexAIError( VertexAIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })` ); } - return new GenerativeModel(vertexAI, onCloudParams, requestOptions); + return new GenerativeModel(vertexAI, inCloudParams, requestOptions); } /** diff --git a/packages/vertexai/src/models/generative-model.ts b/packages/vertexai/src/models/generative-model.ts index 983118bf6ff..32406ea19f7 100644 --- a/packages/vertexai/src/models/generative-model.ts +++ b/packages/vertexai/src/models/generative-model.ts @@ -49,6 +49,10 @@ import { VertexAIModel } from './vertexai-model'; * @public */ export class GenerativeModel extends VertexAIModel { + /** + * Defines the name of the default in-cloud model to use for hybrid inference. + */ + static DEFAULT_HYBRID_IN_CLOUD_MODEL = 'gemini-2.0-flash-lite'; generationConfig: GenerationConfig; safetySettings: SafetySetting[]; requestOptions?: RequestOptions; diff --git a/packages/vertexai/src/types/enums.ts b/packages/vertexai/src/types/enums.ts index 1f81ed79a8f..57629e5c9f5 100644 --- a/packages/vertexai/src/types/enums.ts +++ b/packages/vertexai/src/types/enums.ts @@ -242,11 +242,21 @@ export enum Modality { } /** - * Determines whether inference happens on-device or on-cloud. - * @public + * Determines whether inference happens on-device or in-cloud. */ export enum InferenceMode { + /** + * Uses the on-device model if available, or falls back to the in-cloud model. + */ PREFER_ON_DEVICE = 'PREFER_ON_DEVICE', + + /** + * Exclusively uses the on-device model. Throws if one is not available. + */ ONLY_ON_DEVICE = 'ONLY_ON_DEVICE', - ONLY_ON_CLOUD = 'ONLY_ON_CLOUD' + + /** + * Exclusively uses the in-cloud model. + */ + ONLY_IN_CLOUD = 'ONLY_IN_CLOUD' } diff --git a/packages/vertexai/src/types/requests.ts b/packages/vertexai/src/types/requests.ts index c643fcd6084..345f98ca163 100644 --- a/packages/vertexai/src/types/requests.ts +++ b/packages/vertexai/src/types/requests.ts @@ -217,11 +217,19 @@ export interface FunctionCallingConfig { } /** - * Configures on-device and on-cloud inference. - * @public + * Toggles hybrid inference. */ export interface HybridParams { - mode?: InferenceMode; + /** + * Specifies on-device or in-cloud inference. Defaults to prefer on-device. + */ + mode: InferenceMode; + /** + * Optional. Specifies advanced params for on-device inference. + */ onDeviceParams?: LanguageModelCreateOptions; - onCloudParams?: ModelParams; + /** + * Optional. Specifies advanced params for in-cloud inference. + */ + inCloudParams?: ModelParams; } From eeeab4e83ebc6c8d4ea09bf0de1bf0b77fe9c58f Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:29:42 -0700 Subject: [PATCH 05/14] Moving to string type for the inference mode (#8941) --- common/api-review/vertexai.api.md | 6 +---- docs-devsite/vertexai.md | 30 +++++++++---------------- packages/vertexai/src/api.test.ts | 11 +++------ packages/vertexai/src/types/enums.ts | 20 ----------------- packages/vertexai/src/types/requests.ts | 11 +++++++-- 5 files changed, 24 insertions(+), 54 deletions(-) diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index d8c189a0059..2286824d45c 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -510,11 +510,7 @@ export interface ImagenSafetySettings { } // @public -export enum InferenceMode { - ONLY_IN_CLOUD = "ONLY_IN_CLOUD", - ONLY_ON_DEVICE = "ONLY_ON_DEVICE", - PREFER_ON_DEVICE = "PREFER_ON_DEVICE" -} +export type InferenceMode = 'prefer_on_device' | 'only_on_device' | 'only_in_cloud'; // @public export interface InlineDataPart { diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md index 734a21fa3dc..3ad906c6e47 100644 --- a/docs-devsite/vertexai.md +++ b/docs-devsite/vertexai.md @@ -55,7 +55,6 @@ The Vertex AI in Firebase Web SDK. | [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | (Public Preview) Aspect ratios for Imagen images.To specify an aspect ratio for generated images, set the aspectRatio property in your [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface).See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. | | [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | (Public Preview) A filter level controlling whether generation of images containing people or faces is allowed.See the personGeneration documentation for more details. | | [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | (Public Preview) A filter level controlling how aggressively to filter sensitive content.Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, violence, sexual, derogatory, and toxic). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. | -| [InferenceMode](./vertexai.md#inferencemode) | Determines whether inference happens on-device or in-cloud. | | [Modality](./vertexai.md#modality) | Content part modality. | | [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | | [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that [VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class) can have. | @@ -132,6 +131,7 @@ The Vertex AI in Firebase Web SDK. | Type Alias | Description | | --- | --- | +| [InferenceMode](./vertexai.md#inferencemode) | Determines whether inference happens on-device or in-cloud. | | [Part](./vertexai.md#part) | Content part - includes text, image/video, or function call/response part types. | | [Role](./vertexai.md#role) | Role is the producer of the content. | | [Tool](./vertexai.md#tool) | Defines a tool that model can call to access external knowledge. | @@ -225,6 +225,16 @@ Possible roles. POSSIBLE_ROLES: readonly ["user", "model", "function", "system"] ``` +## InferenceMode + +Determines whether inference happens on-device or in-cloud. + +Signature: + +```typescript +export type InferenceMode = 'prefer_on_device' | 'only_on_device' | 'only_in_cloud'; +``` + ## Part Content part - includes text, image/video, or function call/response part types. @@ -491,24 +501,6 @@ export declare enum ImagenSafetyFilterLevel | BLOCK\_NONE | "block_none" | (Public Preview) The least aggressive filtering level; blocks very few sensitive prompts and responses.Access to this feature is restricted and may require your case to be reviewed and approved by Cloud support. | | BLOCK\_ONLY\_HIGH | "block_only_high" | (Public Preview) Blocks few sensitive prompts and responses. | -## InferenceMode - -Determines whether inference happens on-device or in-cloud. - -Signature: - -```typescript -export declare enum InferenceMode -``` - -## Enumeration Members - -| Member | Value | Description | -| --- | --- | --- | -| ONLY\_IN\_CLOUD | "ONLY_IN_CLOUD" | Exclusively uses the in-cloud model. | -| ONLY\_ON\_DEVICE | "ONLY_ON_DEVICE" | Exclusively uses the on-device model. Throws if one is not available. | -| PREFER\_ON\_DEVICE | "PREFER_ON_DEVICE" | Uses the on-device model if available, or falls back to the in-cloud model. | - ## Modality Content part modality. diff --git a/packages/vertexai/src/api.test.ts b/packages/vertexai/src/api.test.ts index aeb090e24c5..7b25dbdf9e9 100644 --- a/packages/vertexai/src/api.test.ts +++ b/packages/vertexai/src/api.test.ts @@ -14,12 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -import { - ImagenModelParams, - InferenceMode, - ModelParams, - VertexAIErrorCode -} from './types'; +import { ImagenModelParams, ModelParams, VertexAIErrorCode } from './types'; import { VertexAIError } from './errors'; import { ImagenModel, getGenerativeModel, getImagenModel } from './api'; import { expect } from 'chai'; @@ -108,7 +103,7 @@ describe('Top level API', () => { }); it('getGenerativeModel with HybridParams sets a default model', () => { const genModel = getGenerativeModel(fakeVertexAI, { - mode: InferenceMode.ONLY_ON_DEVICE + mode: 'only_on_device' }); expect(genModel.model).to.equal( `publishers/google/models/${GenerativeModel.DEFAULT_HYBRID_IN_CLOUD_MODEL}` @@ -116,7 +111,7 @@ describe('Top level API', () => { }); it('getGenerativeModel with HybridParams honors a model override', () => { const genModel = getGenerativeModel(fakeVertexAI, { - mode: InferenceMode.ONLY_IN_CLOUD, + mode: 'prefer_on_device', inCloudParams: { model: 'my-model' } }); expect(genModel.model).to.equal('publishers/google/models/my-model'); diff --git a/packages/vertexai/src/types/enums.ts b/packages/vertexai/src/types/enums.ts index 57629e5c9f5..a9481d40f5f 100644 --- a/packages/vertexai/src/types/enums.ts +++ b/packages/vertexai/src/types/enums.ts @@ -240,23 +240,3 @@ export enum Modality { */ DOCUMENT = 'DOCUMENT' } - -/** - * Determines whether inference happens on-device or in-cloud. - */ -export enum InferenceMode { - /** - * Uses the on-device model if available, or falls back to the in-cloud model. - */ - PREFER_ON_DEVICE = 'PREFER_ON_DEVICE', - - /** - * Exclusively uses the on-device model. Throws if one is not available. - */ - ONLY_ON_DEVICE = 'ONLY_ON_DEVICE', - - /** - * Exclusively uses the in-cloud model. - */ - ONLY_IN_CLOUD = 'ONLY_IN_CLOUD' -} diff --git a/packages/vertexai/src/types/requests.ts b/packages/vertexai/src/types/requests.ts index 345f98ca163..35a3c428d73 100644 --- a/packages/vertexai/src/types/requests.ts +++ b/packages/vertexai/src/types/requests.ts @@ -22,8 +22,7 @@ import { FunctionCallingMode, HarmBlockMethod, HarmBlockThreshold, - HarmCategory, - InferenceMode + HarmCategory } from './enums'; import { ObjectSchemaInterface, SchemaRequest } from './schema'; @@ -233,3 +232,11 @@ export interface HybridParams { */ inCloudParams?: ModelParams; } + +/** + * Determines whether inference happens on-device or in-cloud. + */ +export type InferenceMode = + | 'prefer_on_device' + | 'only_on_device' + | 'only_in_cloud'; From b7a3d24f3dae63a4a4fe6f9b65c46ab8573caaca Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Wed, 16 Apr 2025 16:52:32 -0700 Subject: [PATCH 06/14] Define ChromeAdapter class (#8942) Co-authored-by: Erik Eldridge --- common/api-review/vertexai.api.md | 5 +- docs-devsite/vertexai.chatsession.md | 5 +- docs-devsite/vertexai.generativemodel.md | 5 +- packages/vertexai/src/api.ts | 8 +- .../vertexai/src/methods/chat-session.test.ts | 19 ++- packages/vertexai/src/methods/chat-session.ts | 3 + .../vertexai/src/methods/chrome-adapter.ts | 51 ++++++ .../src/methods/generate-content.test.ts | 60 +++++-- .../vertexai/src/methods/generate-content.ts | 27 +++- .../src/models/generative-model.test.ts | 147 +++++++++++------- .../vertexai/src/models/generative-model.ts | 4 + 11 files changed, 256 insertions(+), 78 deletions(-) create mode 100644 packages/vertexai/src/methods/chrome-adapter.ts diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index 2286824d45c..f03e7ed14ed 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -42,7 +42,8 @@ export class BooleanSchema extends Schema { // @public export class ChatSession { // Warning: (ae-forgotten-export) The symbol "ApiSettings" needs to be exported by the entry point index.d.ts - constructor(apiSettings: ApiSettings, model: string, params?: StartChatParams | undefined, requestOptions?: RequestOptions | undefined); + // Warning: (ae-forgotten-export) The symbol "ChromeAdapter" needs to be exported by the entry point index.d.ts + constructor(apiSettings: ApiSettings, model: string, chromeAdapter: ChromeAdapter, params?: StartChatParams | undefined, requestOptions?: RequestOptions | undefined); getHistory(): Promise; // (undocumented) model: string; @@ -324,7 +325,7 @@ export interface GenerativeContentBlob { // @public export class GenerativeModel extends VertexAIModel { - constructor(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: RequestOptions); + constructor(vertexAI: VertexAI, modelParams: ModelParams, chromeAdapter: ChromeAdapter, requestOptions?: RequestOptions); countTokens(request: CountTokensRequest | string | Array): Promise; static DEFAULT_HYBRID_IN_CLOUD_MODEL: string; generateContent(request: GenerateContentRequest | string | Array): Promise; diff --git a/docs-devsite/vertexai.chatsession.md b/docs-devsite/vertexai.chatsession.md index ed359f7e08c..c4a06206bfd 100644 --- a/docs-devsite/vertexai.chatsession.md +++ b/docs-devsite/vertexai.chatsession.md @@ -22,7 +22,7 @@ export declare class ChatSession | Constructor | Modifiers | Description | | --- | --- | --- | -| [(constructor)(apiSettings, model, params, requestOptions)](./vertexai.chatsession.md#chatsessionconstructor) | | Constructs a new instance of the ChatSession class | +| [(constructor)(apiSettings, model, chromeAdapter, params, requestOptions)](./vertexai.chatsession.md#chatsessionconstructor) | | Constructs a new instance of the ChatSession class | ## Properties @@ -47,7 +47,7 @@ Constructs a new instance of the `ChatSession` class Signature: ```typescript -constructor(apiSettings: ApiSettings, model: string, params?: StartChatParams | undefined, requestOptions?: RequestOptions | undefined); +constructor(apiSettings: ApiSettings, model: string, chromeAdapter: ChromeAdapter, params?: StartChatParams | undefined, requestOptions?: RequestOptions | undefined); ``` #### Parameters @@ -56,6 +56,7 @@ constructor(apiSettings: ApiSettings, model: string, params?: StartChatParams | | --- | --- | --- | | apiSettings | ApiSettings | | | model | string | | +| chromeAdapter | ChromeAdapter | | | params | [StartChatParams](./vertexai.startchatparams.md#startchatparams_interface) \| undefined | | | requestOptions | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) \| undefined | | diff --git a/docs-devsite/vertexai.generativemodel.md b/docs-devsite/vertexai.generativemodel.md index 831ec5d4369..978bacc612f 100644 --- a/docs-devsite/vertexai.generativemodel.md +++ b/docs-devsite/vertexai.generativemodel.md @@ -23,7 +23,7 @@ export declare class GenerativeModel extends VertexAIModel | Constructor | Modifiers | Description | | --- | --- | --- | -| [(constructor)(vertexAI, modelParams, requestOptions)](./vertexai.generativemodel.md#generativemodelconstructor) | | Constructs a new instance of the GenerativeModel class | +| [(constructor)(vertexAI, modelParams, chromeAdapter, requestOptions)](./vertexai.generativemodel.md#generativemodelconstructor) | | Constructs a new instance of the GenerativeModel class | ## Properties @@ -53,7 +53,7 @@ Constructs a new instance of the `GenerativeModel` class Signature: ```typescript -constructor(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: RequestOptions); +constructor(vertexAI: VertexAI, modelParams: ModelParams, chromeAdapter: ChromeAdapter, requestOptions?: RequestOptions); ``` #### Parameters @@ -62,6 +62,7 @@ constructor(vertexAI: VertexAI, modelParams: ModelParams, requestOptions?: Reque | --- | --- | --- | | vertexAI | [VertexAI](./vertexai.vertexai.md#vertexai_interface) | | | modelParams | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | | +| chromeAdapter | ChromeAdapter | | | requestOptions | [RequestOptions](./vertexai.requestoptions.md#requestoptions_interface) | | ## GenerativeModel.DEFAULT\_HYBRID\_IN\_CLOUD\_MODEL diff --git a/packages/vertexai/src/api.ts b/packages/vertexai/src/api.ts index 2b8536dd89f..236ca73ce87 100644 --- a/packages/vertexai/src/api.ts +++ b/packages/vertexai/src/api.ts @@ -30,6 +30,7 @@ import { } from './types'; import { VertexAIError } from './errors'; import { VertexAIModel, GenerativeModel, ImagenModel } from './models'; +import { ChromeAdapter } from './methods/chrome-adapter'; export { ChatSession } from './methods/chat-session'; export * from './requests/schema-builder'; @@ -91,7 +92,12 @@ export function getGenerativeModel( `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })` ); } - return new GenerativeModel(vertexAI, inCloudParams, requestOptions); + return new GenerativeModel( + vertexAI, + inCloudParams, + new ChromeAdapter(hybridParams.mode, hybridParams.onDeviceParams), + requestOptions + ); } /** diff --git a/packages/vertexai/src/methods/chat-session.test.ts b/packages/vertexai/src/methods/chat-session.test.ts index bd389a3d778..64f77f740f0 100644 --- a/packages/vertexai/src/methods/chat-session.test.ts +++ b/packages/vertexai/src/methods/chat-session.test.ts @@ -23,6 +23,7 @@ import * as generateContentMethods from './generate-content'; import { GenerateContentStreamResult } from '../types'; import { ChatSession } from './chat-session'; import { ApiSettings } from '../types/internal'; +import { ChromeAdapter } from './chrome-adapter'; use(sinonChai); use(chaiAsPromised); @@ -44,7 +45,11 @@ describe('ChatSession', () => { generateContentMethods, 'generateContent' ).rejects('generateContent failed'); - const chatSession = new ChatSession(fakeApiSettings, 'a-model'); + const chatSession = new ChatSession( + fakeApiSettings, + 'a-model', + new ChromeAdapter() + ); await expect(chatSession.sendMessage('hello')).to.be.rejected; expect(generateContentStub).to.be.calledWith( fakeApiSettings, @@ -61,7 +66,11 @@ describe('ChatSession', () => { generateContentMethods, 'generateContentStream' ).rejects('generateContentStream failed'); - const chatSession = new ChatSession(fakeApiSettings, 'a-model'); + const chatSession = new ChatSession( + fakeApiSettings, + 'a-model', + new ChromeAdapter() + ); await expect(chatSession.sendMessageStream('hello')).to.be.rejected; expect(generateContentStreamStub).to.be.calledWith( fakeApiSettings, @@ -80,7 +89,11 @@ describe('ChatSession', () => { generateContentMethods, 'generateContentStream' ).resolves({} as unknown as GenerateContentStreamResult); - const chatSession = new ChatSession(fakeApiSettings, 'a-model'); + const chatSession = new ChatSession( + fakeApiSettings, + 'a-model', + new ChromeAdapter() + ); await chatSession.sendMessageStream('hello'); expect(generateContentStreamStub).to.be.calledWith( fakeApiSettings, diff --git a/packages/vertexai/src/methods/chat-session.ts b/packages/vertexai/src/methods/chat-session.ts index 60794001e37..4188872cff7 100644 --- a/packages/vertexai/src/methods/chat-session.ts +++ b/packages/vertexai/src/methods/chat-session.ts @@ -30,6 +30,7 @@ import { validateChatHistory } from './chat-session-helpers'; import { generateContent, generateContentStream } from './generate-content'; import { ApiSettings } from '../types/internal'; import { logger } from '../logger'; +import { ChromeAdapter } from './chrome-adapter'; /** * Do not log a message for this error. @@ -50,6 +51,7 @@ export class ChatSession { constructor( apiSettings: ApiSettings, public model: string, + private chromeAdapter: ChromeAdapter, public params?: StartChatParams, public requestOptions?: RequestOptions ) { @@ -95,6 +97,7 @@ export class ChatSession { this._apiSettings, this.model, generateContentRequest, + this.chromeAdapter, this.requestOptions ) ) diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts new file mode 100644 index 00000000000..26ecd55c2da --- /dev/null +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -0,0 +1,51 @@ +/** + * @license + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { GenerateContentRequest, InferenceMode } from '../types'; +import { LanguageModelCreateOptions } from '../types/language-model'; + +/** + * Defines an inference "backend" that uses Chrome's on-device model, + * and encapsulates logic for detecting when on-device is possible. + */ +export class ChromeAdapter { + constructor( + private mode?: InferenceMode, + private onDeviceParams?: LanguageModelCreateOptions + ) {} + // eslint-disable-next-line @typescript-eslint/no-unused-vars + async isAvailable(request: GenerateContentRequest): Promise { + return false; + } + async generateContentOnDevice( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + request: GenerateContentRequest + ): Promise { + return { + json: () => + Promise.resolve({ + candidates: [ + { + content: { + parts: [{ text: '' }] + } + } + ] + }) + } as Response; + } +} diff --git a/packages/vertexai/src/methods/generate-content.test.ts b/packages/vertexai/src/methods/generate-content.test.ts index 1d15632f828..f714ec4d535 100644 --- a/packages/vertexai/src/methods/generate-content.test.ts +++ b/packages/vertexai/src/methods/generate-content.test.ts @@ -30,6 +30,7 @@ import { } from '../types'; import { ApiSettings } from '../types/internal'; import { Task } from '../requests/request'; +import { ChromeAdapter } from './chrome-adapter'; use(sinonChai); use(chaiAsPromised); @@ -70,7 +71,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text()).to.include('Mountain View, California'); expect(makeRequestStub).to.be.calledWith( @@ -95,7 +97,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text()).to.include('Use Freshly Ground Coffee'); expect(result.response.text()).to.include('30 minutes of brewing'); @@ -118,7 +121,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.usageMetadata?.totalTokenCount).to.equal(1913); expect(result.response.usageMetadata?.candidatesTokenCount).to.equal(76); @@ -153,7 +157,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text()).to.include( 'Some information cited from an external source' @@ -180,7 +185,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text).to.throw('SAFETY'); expect(makeRequestStub).to.be.calledWith( @@ -202,7 +208,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text).to.throw('SAFETY'); expect(makeRequestStub).to.be.calledWith( @@ -224,7 +231,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text()).to.equal(''); expect(makeRequestStub).to.be.calledWith( @@ -246,7 +254,8 @@ describe('generateContent()', () => { const result = await generateContent( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.response.text()).to.include('Some text'); expect(makeRequestStub).to.be.calledWith( @@ -268,7 +277,12 @@ describe('generateContent()', () => { json: mockResponse.json } as Response); await expect( - generateContent(fakeApiSettings, 'model', fakeRequestParams) + generateContent( + fakeApiSettings, + 'model', + fakeRequestParams, + new ChromeAdapter() + ) ).to.be.rejectedWith(/400.*invalid argument/); expect(mockFetch).to.be.called; }); @@ -283,10 +297,36 @@ describe('generateContent()', () => { json: mockResponse.json } as Response); await expect( - generateContent(fakeApiSettings, 'model', fakeRequestParams) + generateContent( + fakeApiSettings, + 'model', + fakeRequestParams, + new ChromeAdapter() + ) ).to.be.rejectedWith( /firebasevertexai\.googleapis[\s\S]*my-project[\s\S]*api-not-enabled/ ); expect(mockFetch).to.be.called; }); + it('on-device', async () => { + const chromeAdapter = new ChromeAdapter(); + const isAvailableStub = stub(chromeAdapter, 'isAvailable').resolves(true); + const mockResponse = getMockResponse( + 'vertexAI', + 'unary-success-basic-reply-short.json' + ); + const generateContentStub = stub( + chromeAdapter, + 'generateContentOnDevice' + ).resolves(mockResponse as Response); + const result = await generateContent( + fakeApiSettings, + 'model', + fakeRequestParams, + chromeAdapter + ); + expect(result.response.text()).to.include('Mountain View, California'); + expect(isAvailableStub).to.be.called; + expect(generateContentStub).to.be.calledWith(fakeRequestParams); + }); }); diff --git a/packages/vertexai/src/methods/generate-content.ts b/packages/vertexai/src/methods/generate-content.ts index 0944b38016a..ba7a162aa9c 100644 --- a/packages/vertexai/src/methods/generate-content.ts +++ b/packages/vertexai/src/methods/generate-content.ts @@ -26,6 +26,7 @@ import { Task, makeRequest } from '../requests/request'; import { createEnhancedContentResponse } from '../requests/response-helpers'; import { processStream } from '../requests/stream-reader'; import { ApiSettings } from '../types/internal'; +import { ChromeAdapter } from './chrome-adapter'; export async function generateContentStream( apiSettings: ApiSettings, @@ -44,13 +45,13 @@ export async function generateContentStream( return processStream(response); } -export async function generateContent( +async function generateContentOnCloud( apiSettings: ApiSettings, model: string, params: GenerateContentRequest, requestOptions?: RequestOptions -): Promise { - const response = await makeRequest( +): Promise { + return makeRequest( model, Task.GENERATE_CONTENT, apiSettings, @@ -58,6 +59,26 @@ export async function generateContent( JSON.stringify(params), requestOptions ); +} + +export async function generateContent( + apiSettings: ApiSettings, + model: string, + params: GenerateContentRequest, + chromeAdapter: ChromeAdapter, + requestOptions?: RequestOptions +): Promise { + let response; + if (await chromeAdapter.isAvailable(params)) { + response = await chromeAdapter.generateContentOnDevice(params); + } else { + response = await generateContentOnCloud( + apiSettings, + model, + params, + requestOptions + ); + } const responseJson: GenerateContentResponse = await response.json(); const enhancedResponse = createEnhancedContentResponse(responseJson); return { diff --git a/packages/vertexai/src/models/generative-model.test.ts b/packages/vertexai/src/models/generative-model.test.ts index 987f9b115e2..7fcae843347 100644 --- a/packages/vertexai/src/models/generative-model.test.ts +++ b/packages/vertexai/src/models/generative-model.test.ts @@ -21,6 +21,7 @@ import * as request from '../requests/request'; import { match, restore, stub } from 'sinon'; import { getMockResponse } from '../../test-utils/mock-response'; import sinonChai from 'sinon-chai'; +import { ChromeAdapter } from '../methods/chrome-adapter'; use(sinonChai); @@ -39,21 +40,27 @@ const fakeVertexAI: VertexAI = { describe('GenerativeModel', () => { it('passes params through to generateContent', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - tools: [ - { - functionDeclarations: [ - { - name: 'myfunc', - description: 'mydesc' - } - ] - } - ], - toolConfig: { functionCallingConfig: { mode: FunctionCallingMode.NONE } }, - systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + tools: [ + { + functionDeclarations: [ + { + name: 'myfunc', + description: 'mydesc' + } + ] + } + ], + toolConfig: { + functionCallingConfig: { mode: FunctionCallingMode.NONE } + }, + systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } + }, + new ChromeAdapter() + ); expect(genModel.tools?.length).to.equal(1); expect(genModel.toolConfig?.functionCallingConfig?.mode).to.equal( FunctionCallingMode.NONE @@ -84,10 +91,14 @@ describe('GenerativeModel', () => { restore(); }); it('passes text-only systemInstruction through to generateContent', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - systemInstruction: 'be friendly' - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + systemInstruction: 'be friendly' + }, + new ChromeAdapter() + ); expect(genModel.systemInstruction?.parts[0].text).to.equal('be friendly'); const mockResponse = getMockResponse( 'vertexAI', @@ -110,21 +121,27 @@ describe('GenerativeModel', () => { restore(); }); it('generateContent overrides model values', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - tools: [ - { - functionDeclarations: [ - { - name: 'myfunc', - description: 'mydesc' - } - ] - } - ], - toolConfig: { functionCallingConfig: { mode: FunctionCallingMode.NONE } }, - systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + tools: [ + { + functionDeclarations: [ + { + name: 'myfunc', + description: 'mydesc' + } + ] + } + ], + toolConfig: { + functionCallingConfig: { mode: FunctionCallingMode.NONE } + }, + systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } + }, + new ChromeAdapter() + ); expect(genModel.tools?.length).to.equal(1); expect(genModel.toolConfig?.functionCallingConfig?.mode).to.equal( FunctionCallingMode.NONE @@ -166,14 +183,20 @@ describe('GenerativeModel', () => { restore(); }); it('passes params through to chat.sendMessage', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - tools: [ - { functionDeclarations: [{ name: 'myfunc', description: 'mydesc' }] } - ], - toolConfig: { functionCallingConfig: { mode: FunctionCallingMode.NONE } }, - systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + tools: [ + { functionDeclarations: [{ name: 'myfunc', description: 'mydesc' }] } + ], + toolConfig: { + functionCallingConfig: { mode: FunctionCallingMode.NONE } + }, + systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } + }, + new ChromeAdapter() + ); expect(genModel.tools?.length).to.equal(1); expect(genModel.toolConfig?.functionCallingConfig?.mode).to.equal( FunctionCallingMode.NONE @@ -204,10 +227,14 @@ describe('GenerativeModel', () => { restore(); }); it('passes text-only systemInstruction through to chat.sendMessage', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - systemInstruction: 'be friendly' - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + systemInstruction: 'be friendly' + }, + new ChromeAdapter() + ); expect(genModel.systemInstruction?.parts[0].text).to.equal('be friendly'); const mockResponse = getMockResponse( 'vertexAI', @@ -230,14 +257,20 @@ describe('GenerativeModel', () => { restore(); }); it('startChat overrides model values', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { - model: 'my-model', - tools: [ - { functionDeclarations: [{ name: 'myfunc', description: 'mydesc' }] } - ], - toolConfig: { functionCallingConfig: { mode: FunctionCallingMode.NONE } }, - systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } - }); + const genModel = new GenerativeModel( + fakeVertexAI, + { + model: 'my-model', + tools: [ + { functionDeclarations: [{ name: 'myfunc', description: 'mydesc' }] } + ], + toolConfig: { + functionCallingConfig: { mode: FunctionCallingMode.NONE } + }, + systemInstruction: { role: 'system', parts: [{ text: 'be friendly' }] } + }, + new ChromeAdapter() + ); expect(genModel.tools?.length).to.equal(1); expect(genModel.toolConfig?.functionCallingConfig?.mode).to.equal( FunctionCallingMode.NONE @@ -282,7 +315,11 @@ describe('GenerativeModel', () => { restore(); }); it('calls countTokens', async () => { - const genModel = new GenerativeModel(fakeVertexAI, { model: 'my-model' }); + const genModel = new GenerativeModel( + fakeVertexAI, + { model: 'my-model' }, + new ChromeAdapter() + ); const mockResponse = getMockResponse( 'vertexAI', 'unary-success-total-tokens.json' diff --git a/packages/vertexai/src/models/generative-model.ts b/packages/vertexai/src/models/generative-model.ts index 32406ea19f7..c58eb3a1497 100644 --- a/packages/vertexai/src/models/generative-model.ts +++ b/packages/vertexai/src/models/generative-model.ts @@ -43,6 +43,7 @@ import { } from '../requests/request-helpers'; import { VertexAI } from '../public-types'; import { VertexAIModel } from './vertexai-model'; +import { ChromeAdapter } from '../methods/chrome-adapter'; /** * Class for generative model APIs. @@ -63,6 +64,7 @@ export class GenerativeModel extends VertexAIModel { constructor( vertexAI: VertexAI, modelParams: ModelParams, + private chromeAdapter: ChromeAdapter, requestOptions?: RequestOptions ) { super(vertexAI, modelParams.model); @@ -95,6 +97,7 @@ export class GenerativeModel extends VertexAIModel { systemInstruction: this.systemInstruction, ...formattedParams }, + this.chromeAdapter, this.requestOptions ); } @@ -132,6 +135,7 @@ export class GenerativeModel extends VertexAIModel { return new ChatSession( this._apiSettings, this.model, + this.chromeAdapter, { tools: this.tools, toolConfig: this.toolConfig, From 57c88413b0dce5b6524bf1790915d9603e2dc5e5 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Thu, 17 Apr 2025 18:05:51 -0700 Subject: [PATCH 07/14] VinF Hybrid Inference: Implement ChromeAdapter (rebased) (#8943) --- e2e/sample-apps/modular.js | 7 +- packages/vertexai/src/api.ts | 7 +- .../src/methods/chrome-adapter.test.ts | 310 ++++++++++++++++++ .../vertexai/src/methods/chrome-adapter.ts | 188 ++++++++++- packages/vertexai/src/types/language-model.ts | 10 +- .../changelog-generator/tsconfig.json | 3 +- yarn.lock | 8 +- 7 files changed, 514 insertions(+), 19 deletions(-) create mode 100644 packages/vertexai/src/methods/chrome-adapter.test.ts diff --git a/e2e/sample-apps/modular.js b/e2e/sample-apps/modular.js index 9e943e04494..4c5238d44dc 100644 --- a/e2e/sample-apps/modular.js +++ b/e2e/sample-apps/modular.js @@ -58,7 +58,12 @@ import { onValue, off } from 'firebase/database'; -import { getGenerativeModel, getVertexAI, VertexAI } from 'firebase/vertexai'; +import { + getGenerativeModel, + getVertexAI, + InferenceMode, + VertexAI +} from 'firebase/vertexai'; import { getDataConnect, DataConnect } from 'firebase/data-connect'; /** diff --git a/packages/vertexai/src/api.ts b/packages/vertexai/src/api.ts index 236ca73ce87..2f6de198608 100644 --- a/packages/vertexai/src/api.ts +++ b/packages/vertexai/src/api.ts @@ -31,6 +31,7 @@ import { import { VertexAIError } from './errors'; import { VertexAIModel, GenerativeModel, ImagenModel } from './models'; import { ChromeAdapter } from './methods/chrome-adapter'; +import { LanguageModel } from './types/language-model'; export { ChatSession } from './methods/chat-session'; export * from './requests/schema-builder'; @@ -95,7 +96,11 @@ export function getGenerativeModel( return new GenerativeModel( vertexAI, inCloudParams, - new ChromeAdapter(hybridParams.mode, hybridParams.onDeviceParams), + new ChromeAdapter( + window.LanguageModel as LanguageModel, + hybridParams.mode, + hybridParams.onDeviceParams + ), requestOptions ); } diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts new file mode 100644 index 00000000000..b11fb9c937e --- /dev/null +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -0,0 +1,310 @@ +/** + * @license + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { expect, use } from 'chai'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import { ChromeAdapter } from './chrome-adapter'; +import { + Availability, + LanguageModel, + LanguageModelCreateOptions +} from '../types/language-model'; +import { stub } from 'sinon'; +import { GenerateContentRequest } from '../types'; + +use(sinonChai); +use(chaiAsPromised); + +describe('ChromeAdapter', () => { + describe('isAvailable', () => { + it('returns false if mode is only cloud', async () => { + const adapter = new ChromeAdapter(undefined, 'only_in_cloud'); + expect( + await adapter.isAvailable({ + contents: [] + }) + ).to.be.false; + }); + it('returns false if AI API is undefined', async () => { + const adapter = new ChromeAdapter(undefined, 'prefer_on_device'); + expect( + await adapter.isAvailable({ + contents: [] + }) + ).to.be.false; + }); + it('returns false if LanguageModel API is undefined', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [] + }) + ).to.be.false; + }); + it('returns false if request contents empty', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [] + }) + ).to.be.false; + }); + it('returns false if request content has function role', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [ + { + role: 'function', + parts: [] + } + ] + }) + ).to.be.false; + }); + it('returns false if request content has multiple parts', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [ + { + role: 'user', + parts: [{ text: 'a' }, { text: 'b' }] + } + ] + }) + ).to.be.false; + }); + it('returns false if request content has non-text part', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [ + { + role: 'user', + parts: [{ inlineData: { mimeType: 'a', data: 'b' } }] + } + ] + }) + ).to.be.false; + }); + it('returns false if request system instruction has function role', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [], + systemInstruction: { + role: 'function', + parts: [] + } + }) + ).to.be.false; + }); + it('returns false if request system instruction has multiple parts', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [], + systemInstruction: { + role: 'function', + parts: [{ text: 'a' }, { text: 'b' }] + } + }) + ).to.be.false; + }); + it('returns false if request system instruction has non-text part', async () => { + const adapter = new ChromeAdapter( + {} as LanguageModel, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [], + systemInstruction: { + role: 'function', + parts: [{ inlineData: { mimeType: 'a', data: 'b' } }] + } + }) + ).to.be.false; + }); + it('returns true if model is readily available', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.available) + } as LanguageModel; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }) + ).to.be.true; + }); + it('returns false and triggers download when model is available after download', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.downloadable), + create: () => Promise.resolve({}) + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + {} as LanguageModel + ); + const onDeviceParams = {} as LanguageModelCreateOptions; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + expect( + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }) + ).to.be.false; + expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + }); + it('avoids redundant downloads', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.downloadable), + create: () => Promise.resolve({}) + } as LanguageModel; + const downloadPromise = new Promise(() => { + /* never resolves */ + }); + const createStub = stub(languageModelProvider, 'create').returns( + downloadPromise + ); + const adapter = new ChromeAdapter(languageModelProvider); + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }); + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }); + expect(createStub).to.have.been.calledOnce; + }); + it('clears state when download completes', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.downloadable), + create: () => Promise.resolve({}) + } as LanguageModel; + let resolveDownload; + const downloadPromise = new Promise(resolveCallback => { + resolveDownload = resolveCallback; + }); + const createStub = stub(languageModelProvider, 'create').returns( + downloadPromise + ); + const adapter = new ChromeAdapter(languageModelProvider); + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }); + resolveDownload!(); + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }); + expect(createStub).to.have.been.calledTwice; + }); + it('returns false when model is never available', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.unavailable), + create: () => Promise.resolve({}) + } as LanguageModel; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device' + ); + expect( + await adapter.isAvailable({ + contents: [{ role: 'user', parts: [{ text: 'hi' }] }] + }) + ).to.be.false; + }); + }); + describe('generateContentOnDevice', () => { + it('generates content', async () => { + const languageModelProvider = { + create: () => Promise.resolve({}) + } as LanguageModel; + const languageModel = { + prompt: i => Promise.resolve(i) + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + languageModel + ); + const promptOutput = 'hi'; + const promptStub = stub(languageModel, 'prompt').resolves(promptOutput); + const onDeviceParams = { + systemPrompt: 'be yourself' + } as LanguageModelCreateOptions; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + const request = { + contents: [{ role: 'user', parts: [{ text: 'anything' }] }] + } as GenerateContentRequest; + const response = await adapter.generateContentOnDevice(request); + // Asserts initialization params are proxied. + expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + // Asserts Vertex input type is mapped to Chrome type. + expect(promptStub).to.have.been.calledOnceWith([ + { + role: request.contents[0].role, + content: [ + { + type: 'text', + content: request.contents[0].parts[0].text + } + ] + } + ]); + // Asserts expected output. + expect(await response.json()).to.deep.equal({ + candidates: [ + { + content: { + parts: [{ text: promptOutput }] + } + } + ] + }); + }); + }); +}); diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 26ecd55c2da..10844079c03 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -15,37 +15,213 @@ * limitations under the License. */ -import { GenerateContentRequest, InferenceMode } from '../types'; -import { LanguageModelCreateOptions } from '../types/language-model'; +import { + Content, + GenerateContentRequest, + InferenceMode, + Part, + Role +} from '../types'; +import { + Availability, + LanguageModel, + LanguageModelCreateOptions, + LanguageModelMessage, + LanguageModelMessageRole, + LanguageModelMessageContent +} from '../types/language-model'; /** * Defines an inference "backend" that uses Chrome's on-device model, * and encapsulates logic for detecting when on-device is possible. */ export class ChromeAdapter { + private isDownloading = false; + private downloadPromise: Promise | undefined; + private oldSession: LanguageModel | undefined; constructor( + private languageModelProvider?: LanguageModel, private mode?: InferenceMode, private onDeviceParams?: LanguageModelCreateOptions ) {} - // eslint-disable-next-line @typescript-eslint/no-unused-vars + + /** + * Checks if a given request can be made on-device. + * + *
    Encapsulates a few concerns: + *
  1. the mode
  2. + *
  3. API existence
  4. + *
  5. prompt formatting
  6. + *
  7. model availability, including triggering download if necessary
  8. + *
+ * + *

Pros: callers needn't be concerned with details of on-device availability.

+ *

Cons: this method spans a few concerns and splits request validation from usage. + * If instance variables weren't already part of the API, we could consider a better + * separation of concerns.

+ */ async isAvailable(request: GenerateContentRequest): Promise { - return false; + // Returns false if we should only use in-cloud inference. + if (this.mode === 'only_in_cloud') { + return false; + } + // Returns false if the on-device inference API is undefined.; + if (!this.languageModelProvider) { + return false; + } + // Returns false if the request can't be run on-device. + if (!ChromeAdapter.isOnDeviceRequest(request)) { + return false; + } + const availability = await this.languageModelProvider.availability(); + switch (availability) { + case Availability.available: + // Returns true only if a model is immediately available. + return true; + case Availability.downloadable: + // Triggers async download if model is downloadable. + this.download(); + default: + return false; + } } + + /** + * Generates content on device. + * + *

This is comparable to {@link GenerativeModel.generateContent} for generating content in + * Cloud.

+ * @param request a standard Vertex {@link GenerateContentRequest} + * @returns {@link Response}, so we can reuse common response formatting. + */ async generateContentOnDevice( - // eslint-disable-next-line @typescript-eslint/no-unused-vars request: GenerateContentRequest ): Promise { + const session = await this.createSession( + // TODO: normalize on-device params during construction. + this.onDeviceParams || {} + ); + const messages = ChromeAdapter.toLanguageModelMessages(request.contents); + const text = await session.prompt(messages); return { json: () => Promise.resolve({ candidates: [ { content: { - parts: [{ text: '' }] + parts: [{ text }] } } ] }) } as Response; } + + /** + * Asserts inference for the given request can be performed by an on-device model. + */ + private static isOnDeviceRequest(request: GenerateContentRequest): boolean { + // Returns false if the prompt is empty. + if (request.contents.length === 0) { + return false; + } + + // Applies the same checks as above, but for each content item. + for (const content of request.contents) { + if (content.role === 'function') { + return false; + } + + if (content.parts.length > 1) { + return false; + } + + if (!content.parts[0].text) { + return false; + } + } + + return true; + } + + /** + * Triggers the download of an on-device model. + * + *

Chrome only downloads models as needed. Chrome knows a model is needed when code calls + * LanguageModel.create.

+ * + *

Since Chrome manages the download, the SDK can only avoid redundant download requests by + * tracking if a download has previously been requested.

+ */ + private download(): void { + if (this.isDownloading) { + return; + } + this.isDownloading = true; + this.downloadPromise = this.languageModelProvider + ?.create(this.onDeviceParams) + .then(() => { + this.isDownloading = false; + }); + } + + /** + * Converts a Vertex role string to a Chrome role string. + */ + private static toOnDeviceRole(role: Role): LanguageModelMessageRole { + return role === 'model' ? 'assistant' : 'user'; + } + + /** + * Converts a Vertex Content object to a Chrome LanguageModelMessage object. + */ + private static toLanguageModelMessages( + contents: Content[] + ): LanguageModelMessage[] { + return contents.map(c => ({ + role: ChromeAdapter.toOnDeviceRole(c.role), + content: c.parts.map(ChromeAdapter.toLanguageModelMessageContent) + })); + } + + /** + * Converts a Vertex Part object to a Chrome LanguageModelMessageContent object. + */ + private static toLanguageModelMessageContent( + part: Part + ): LanguageModelMessageContent { + if (part.text) { + return { + type: 'text', + content: part.text + }; + } + // Assumes contents have been verified to contain only a single TextPart. + // TODO: support other input types + throw new Error('Not yet implemented'); + } + + /** + * Abstracts Chrome session creation. + * + *

Chrome uses a multi-turn session for all inference. Vertex uses single-turn for all + * inference. To map the Vertex API to Chrome's API, the SDK creates a new session for all + * inference.

+ * + *

Chrome will remove a model from memory if it's no longer in use, so this method ensures a + * new session is created before an old session is destroyed.

+ */ + private async createSession( + // TODO: define a default value, since these are optional. + options: LanguageModelCreateOptions + ): Promise { + // TODO: could we use this.onDeviceParams instead of passing in options? + const newSession = await this.languageModelProvider!.create(options); + if (this.oldSession) { + this.oldSession.destroy(); + } + // Holds session reference, so model isn't unloaded from memory. + this.oldSession = newSession; + return newSession; + } } diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts index e564ca467b4..88354d0aeec 100644 --- a/packages/vertexai/src/types/language-model.ts +++ b/packages/vertexai/src/types/language-model.ts @@ -32,7 +32,7 @@ export interface LanguageModel extends EventTarget { ): Promise; destroy(): undefined; } -enum Availability { +export enum Availability { 'unavailable', 'downloadable', 'downloading', @@ -56,14 +56,14 @@ interface LanguageModelExpectedInput { type: LanguageModelMessageType; languages?: string[]; } -type LanguageModelPrompt = +export type LanguageModelPrompt = | LanguageModelMessage[] | LanguageModelMessageShorthand[] | string; type LanguageModelInitialPrompts = | LanguageModelMessage[] | LanguageModelMessageShorthand[]; -interface LanguageModelMessage { +export interface LanguageModelMessage { role: LanguageModelMessageRole; content: LanguageModelMessageContent[]; } @@ -71,11 +71,11 @@ interface LanguageModelMessageShorthand { role: LanguageModelMessageRole; content: string; } -interface LanguageModelMessageContent { +export interface LanguageModelMessageContent { type: LanguageModelMessageType; content: LanguageModelMessageContentValue; } -type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; +export type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; type LanguageModelMessageType = 'text' | 'image' | 'audio'; type LanguageModelMessageContentValue = | ImageBitmapSource diff --git a/repo-scripts/changelog-generator/tsconfig.json b/repo-scripts/changelog-generator/tsconfig.json index 38bdb7035e4..cffe622284d 100644 --- a/repo-scripts/changelog-generator/tsconfig.json +++ b/repo-scripts/changelog-generator/tsconfig.json @@ -3,7 +3,8 @@ "strict": true, "outDir": "dist", "lib": [ - "ESNext" + "ESNext", + "dom" ], "module": "CommonJS", "moduleResolution": "node", diff --git a/yarn.lock b/yarn.lock index 51ede769d03..d5ea91a7093 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2938,11 +2938,9 @@ "@types/node" "*" "@types/cors@^2.8.12": - version "2.8.17" - resolved "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz#5d718a5e494a8166f569d986794e49c48b216b2b" - integrity sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA== - dependencies: - "@types/node" "*" + version "2.8.12" + resolved "https://registry.npmjs.org/@types/cors/-/cors-2.8.12.tgz" + integrity sha512-vt+kDhq/M2ayberEtJcIN/hxXy1Pk+59g2FV/ZQceeaTyCtCucjL2Q7FXlFjtWn4n15KCr1NE2lNNFhp0lEThw== "@types/deep-eql@*": version "4.0.2" From 5d4c7a0785a16e9cc24642e1ecae7c955730a6b2 Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Fri, 18 Apr 2025 15:59:01 -0700 Subject: [PATCH 08/14] Adding count token impl (#8950) --- .../src/methods/chrome-adapter.test.ts | 52 +++++++++++++++++++ .../vertexai/src/methods/chrome-adapter.ts | 16 ++++++ .../vertexai/src/methods/count-tokens.test.ts | 37 +++++++++++-- packages/vertexai/src/methods/count-tokens.ts | 17 +++++- .../vertexai/src/models/generative-model.ts | 7 ++- 5 files changed, 123 insertions(+), 6 deletions(-) diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index b11fb9c937e..cce97b25f5a 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -307,4 +307,56 @@ describe('ChromeAdapter', () => { }); }); }); + describe('countTokens', () => { + it('counts tokens from a singular input', async () => { + const inputText = 'first'; + const expectedCount = 10; + const onDeviceParams = { + systemPrompt: 'be yourself' + } as LanguageModelCreateOptions; + + // setting up stubs + const languageModelProvider = { + create: () => Promise.resolve({}) + } as LanguageModel; + const languageModel = { + measureInputUsage: _i => Promise.resolve(123) + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + languageModel + ); + // overrides impl with stub method + const measureInputUsageStub = stub( + languageModel, + 'measureInputUsage' + ).resolves(expectedCount); + + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + const countTokenRequest = { + contents: [{ role: 'user', parts: [{ text: inputText }] }] + } as GenerateContentRequest; + const response = await adapter.countTokens(countTokenRequest); + // Asserts initialization params are proxied. + expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + // Asserts Vertex input type is mapped to Chrome type. + expect(measureInputUsageStub).to.have.been.calledOnceWith([ + { + role: 'user', + content: [ + { + type: 'text', + content: inputText + } + ] + } + ]); + expect(await response.json()).to.deep.equal({ + totalTokens: expectedCount + }); + }); + }); }); diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 10844079c03..225d2bd581d 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -17,6 +17,7 @@ import { Content, + CountTokensRequest, GenerateContentRequest, InferenceMode, Part, @@ -117,6 +118,21 @@ export class ChromeAdapter { } as Response; } + async countTokens(request: CountTokensRequest): Promise { + // TODO: Check if the request contains an image, and if so, throw. + const session = await this.createSession( + // TODO: normalize on-device params during construction. + this.onDeviceParams || {} + ); + const messages = ChromeAdapter.toLanguageModelMessages(request.contents); + const tokenCount = await session.measureInputUsage(messages); + return { + json: async () => ({ + totalTokens: tokenCount + }) + } as Response; + } + /** * Asserts inference for the given request can be performed by an on-device model. */ diff --git a/packages/vertexai/src/methods/count-tokens.test.ts b/packages/vertexai/src/methods/count-tokens.test.ts index 9eccbf702fe..f77d2a36c2f 100644 --- a/packages/vertexai/src/methods/count-tokens.test.ts +++ b/packages/vertexai/src/methods/count-tokens.test.ts @@ -25,6 +25,7 @@ import { countTokens } from './count-tokens'; import { CountTokensRequest } from '../types'; import { ApiSettings } from '../types/internal'; import { Task } from '../requests/request'; +import { ChromeAdapter } from './chrome-adapter'; use(sinonChai); use(chaiAsPromised); @@ -55,7 +56,8 @@ describe('countTokens()', () => { const result = await countTokens( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.totalTokens).to.equal(6); expect(result.totalBillableCharacters).to.equal(16); @@ -81,7 +83,8 @@ describe('countTokens()', () => { const result = await countTokens( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.totalTokens).to.equal(1837); expect(result.totalBillableCharacters).to.equal(117); @@ -109,7 +112,8 @@ describe('countTokens()', () => { const result = await countTokens( fakeApiSettings, 'model', - fakeRequestParams + fakeRequestParams, + new ChromeAdapter() ); expect(result.totalTokens).to.equal(258); expect(result).to.not.have.property('totalBillableCharacters'); @@ -135,8 +139,33 @@ describe('countTokens()', () => { json: mockResponse.json } as Response); await expect( - countTokens(fakeApiSettings, 'model', fakeRequestParams) + countTokens( + fakeApiSettings, + 'model', + fakeRequestParams, + new ChromeAdapter() + ) ).to.be.rejectedWith(/404.*not found/); expect(mockFetch).to.be.called; }); + it('on-device', async () => { + const chromeAdapter = new ChromeAdapter(); + const isAvailableStub = stub(chromeAdapter, 'isAvailable').resolves(true); + const mockResponse = getMockResponse( + 'vertexAI', + 'unary-success-total-tokens.json' + ); + const countTokensStub = stub(chromeAdapter, 'countTokens').resolves( + mockResponse as Response + ); + const result = await countTokens( + fakeApiSettings, + 'model', + fakeRequestParams, + chromeAdapter + ); + expect(result.totalTokens).eq(6); + expect(isAvailableStub).to.be.called; + expect(countTokensStub).to.be.calledWith(fakeRequestParams); + }); }); diff --git a/packages/vertexai/src/methods/count-tokens.ts b/packages/vertexai/src/methods/count-tokens.ts index c9d43a5b6fd..108a4b9ee6c 100644 --- a/packages/vertexai/src/methods/count-tokens.ts +++ b/packages/vertexai/src/methods/count-tokens.ts @@ -22,8 +22,9 @@ import { } from '../types'; import { Task, makeRequest } from '../requests/request'; import { ApiSettings } from '../types/internal'; +import { ChromeAdapter } from './chrome-adapter'; -export async function countTokens( +export async function countTokensOnCloud( apiSettings: ApiSettings, model: string, params: CountTokensRequest, @@ -39,3 +40,17 @@ export async function countTokens( ); return response.json(); } + +export async function countTokens( + apiSettings: ApiSettings, + model: string, + params: CountTokensRequest, + chromeAdapter: ChromeAdapter, + requestOptions?: RequestOptions +): Promise { + if (await chromeAdapter.isAvailable(params)) { + return (await chromeAdapter.countTokens(params)).json(); + } + + return countTokensOnCloud(apiSettings, model, params, requestOptions); +} diff --git a/packages/vertexai/src/models/generative-model.ts b/packages/vertexai/src/models/generative-model.ts index c58eb3a1497..0f7e408282c 100644 --- a/packages/vertexai/src/models/generative-model.ts +++ b/packages/vertexai/src/models/generative-model.ts @@ -153,6 +153,11 @@ export class GenerativeModel extends VertexAIModel { request: CountTokensRequest | string | Array ): Promise { const formattedParams = formatGenerateContentInput(request); - return countTokens(this._apiSettings, this.model, formattedParams); + return countTokens( + this._apiSettings, + this.model, + formattedParams, + this.chromeAdapter + ); } } From 58d6d1fb59d3431c995939319c0003f3fded0941 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Fri, 18 Apr 2025 17:53:11 -0700 Subject: [PATCH 09/14] VinF Hybrid Inference #4: ChromeAdapter in stream methods (rebased) (#8949) --- e2e/sample-apps/modular.js | 16 ++-- packages/vertexai/src/methods/chat-session.ts | 1 + .../src/methods/chrome-adapter.test.ts | 71 ++++++++++++++++- .../vertexai/src/methods/chrome-adapter.ts | 78 +++++++++++++++---- .../src/methods/generate-content.test.ts | 8 +- .../vertexai/src/methods/generate-content.ts | 28 ++++++- .../vertexai/src/models/generative-model.ts | 1 + 7 files changed, 172 insertions(+), 31 deletions(-) diff --git a/e2e/sample-apps/modular.js b/e2e/sample-apps/modular.js index 4c5238d44dc..e3170bb3c57 100644 --- a/e2e/sample-apps/modular.js +++ b/e2e/sample-apps/modular.js @@ -58,12 +58,7 @@ import { onValue, off } from 'firebase/database'; -import { - getGenerativeModel, - getVertexAI, - InferenceMode, - VertexAI -} from 'firebase/vertexai'; +import { getGenerativeModel, getVertexAI } from 'firebase/vertexai'; import { getDataConnect, DataConnect } from 'firebase/data-connect'; /** @@ -318,8 +313,13 @@ function callPerformance(app) { async function callVertexAI(app) { console.log('[VERTEXAI] start'); const vertexAI = getVertexAI(app); - const model = getGenerativeModel(vertexAI, { model: 'gemini-1.5-flash' }); - const result = await model.countTokens('abcdefg'); + const model = getGenerativeModel(vertexAI, { + mode: 'prefer_in_cloud' + }); + const result = await model.generateContentStream("What is Roko's Basalisk?"); + for await (const chunk of result.stream) { + console.log(chunk.text()); + } console.log(`[VERTEXAI] counted tokens: ${result.totalTokens}`); } diff --git a/packages/vertexai/src/methods/chat-session.ts b/packages/vertexai/src/methods/chat-session.ts index 4188872cff7..112ddf5857e 100644 --- a/packages/vertexai/src/methods/chat-session.ts +++ b/packages/vertexai/src/methods/chat-session.ts @@ -149,6 +149,7 @@ export class ChatSession { this._apiSettings, this.model, generateContentRequest, + this.chromeAdapter, this.requestOptions ); diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index cce97b25f5a..a18812374c0 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -30,6 +30,25 @@ import { GenerateContentRequest } from '../types'; use(sinonChai); use(chaiAsPromised); +/** + * Converts the ReadableStream from response.body to an array of strings. + */ +async function toStringArray( + stream: ReadableStream +): Promise { + const decoder = new TextDecoder(); + const actual = []; + const reader = stream.getReader(); + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + actual.push(decoder.decode(value)); + } + return actual; +} + describe('ChromeAdapter', () => { describe('isAvailable', () => { it('returns false if mode is only cloud', async () => { @@ -280,7 +299,7 @@ describe('ChromeAdapter', () => { const request = { contents: [{ role: 'user', parts: [{ text: 'anything' }] }] } as GenerateContentRequest; - const response = await adapter.generateContentOnDevice(request); + const response = await adapter.generateContent(request); // Asserts initialization params are proxied. expect(createStub).to.have.been.calledOnceWith(onDeviceParams); // Asserts Vertex input type is mapped to Chrome type. @@ -325,6 +344,7 @@ describe('ChromeAdapter', () => { const createStub = stub(languageModelProvider, 'create').resolves( languageModel ); + // overrides impl with stub method const measureInputUsageStub = stub( languageModel, @@ -336,6 +356,7 @@ describe('ChromeAdapter', () => { 'prefer_on_device', onDeviceParams ); + const countTokenRequest = { contents: [{ role: 'user', parts: [{ text: inputText }] }] } as GenerateContentRequest; @@ -359,4 +380,52 @@ describe('ChromeAdapter', () => { }); }); }); + describe('generateContentStreamOnDevice', () => { + it('generates content stream', async () => { + const languageModelProvider = { + create: () => Promise.resolve({}) + } as LanguageModel; + const languageModel = { + promptStreaming: _i => new ReadableStream() + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + languageModel + ); + const part = 'hi'; + const promptStub = stub(languageModel, 'promptStreaming').returns( + new ReadableStream({ + start(controller) { + controller.enqueue([part]); + controller.close(); + } + }) + ); + const onDeviceParams = {} as LanguageModelCreateOptions; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + const request = { + contents: [{ role: 'user', parts: [{ text: 'anything' }] }] + } as GenerateContentRequest; + const response = await adapter.generateContentStream(request); + expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + expect(promptStub).to.have.been.calledOnceWith([ + { + role: request.contents[0].role, + content: [ + { + type: 'text', + content: request.contents[0].parts[0].text + } + ] + } + ]); + const actual = await toStringArray(response.body!); + expect(actual).to.deep.equal([ + `data: {"candidates":[{"content":{"role":"model","parts":[{"text":["${part}"]}]}}]}\n\n` + ]); + }); + }); }); diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 225d2bd581d..dcdb38b7fd8 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -95,7 +95,25 @@ export class ChromeAdapter { * @param request a standard Vertex {@link GenerateContentRequest} * @returns {@link Response}, so we can reuse common response formatting. */ - async generateContentOnDevice( + async generateContent(request: GenerateContentRequest): Promise { + const session = await this.createSession( + // TODO: normalize on-device params during construction. + this.onDeviceParams || {} + ); + const messages = ChromeAdapter.toLanguageModelMessages(request.contents); + const text = await session.prompt(messages); + return ChromeAdapter.toResponse(text); + } + + /** + * Generates content stream on device. + * + *

This is comparable to {@link GenerativeModel.generateContentStream} for generating content in + * Cloud.

+ * @param request a standard Vertex {@link GenerateContentRequest} + * @returns {@link Response}, so we can reuse common response formatting. + */ + async generateContentStream( request: GenerateContentRequest ): Promise { const session = await this.createSession( @@ -103,19 +121,8 @@ export class ChromeAdapter { this.onDeviceParams || {} ); const messages = ChromeAdapter.toLanguageModelMessages(request.contents); - const text = await session.prompt(messages); - return { - json: () => - Promise.resolve({ - candidates: [ - { - content: { - parts: [{ text }] - } - } - ] - }) - } as Response; + const stream = await session.promptStreaming(messages); + return ChromeAdapter.toStreamResponse(stream); } async countTokens(request: CountTokensRequest): Promise { @@ -240,4 +247,47 @@ export class ChromeAdapter { this.oldSession = newSession; return newSession; } + + /** + * Formats string returned by Chrome as a {@link Response} returned by Vertex. + */ + private static toResponse(text: string): Response { + return { + json: async () => ({ + candidates: [ + { + content: { + parts: [{ text }] + } + } + ] + }) + } as Response; + } + + /** + * Formats string stream returned by Chrome as SSE returned by Vertex. + */ + private static toStreamResponse(stream: ReadableStream): Response { + const encoder = new TextEncoder(); + return { + body: stream.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + const json = JSON.stringify({ + candidates: [ + { + content: { + role: 'model', + parts: [{ text: chunk }] + } + } + ] + }); + controller.enqueue(encoder.encode(`data: ${json}\n\n`)); + } + }) + ) + } as Response; + } } diff --git a/packages/vertexai/src/methods/generate-content.test.ts b/packages/vertexai/src/methods/generate-content.test.ts index f714ec4d535..19c32941090 100644 --- a/packages/vertexai/src/methods/generate-content.test.ts +++ b/packages/vertexai/src/methods/generate-content.test.ts @@ -308,6 +308,7 @@ describe('generateContent()', () => { ); expect(mockFetch).to.be.called; }); + // TODO: define a similar test for generateContentStream it('on-device', async () => { const chromeAdapter = new ChromeAdapter(); const isAvailableStub = stub(chromeAdapter, 'isAvailable').resolves(true); @@ -315,10 +316,9 @@ describe('generateContent()', () => { 'vertexAI', 'unary-success-basic-reply-short.json' ); - const generateContentStub = stub( - chromeAdapter, - 'generateContentOnDevice' - ).resolves(mockResponse as Response); + const generateContentStub = stub(chromeAdapter, 'generateContent').resolves( + mockResponse as Response + ); const result = await generateContent( fakeApiSettings, 'model', diff --git a/packages/vertexai/src/methods/generate-content.ts b/packages/vertexai/src/methods/generate-content.ts index ba7a162aa9c..1dc5918516e 100644 --- a/packages/vertexai/src/methods/generate-content.ts +++ b/packages/vertexai/src/methods/generate-content.ts @@ -28,13 +28,13 @@ import { processStream } from '../requests/stream-reader'; import { ApiSettings } from '../types/internal'; import { ChromeAdapter } from './chrome-adapter'; -export async function generateContentStream( +async function generateContentStreamOnCloud( apiSettings: ApiSettings, model: string, params: GenerateContentRequest, requestOptions?: RequestOptions -): Promise { - const response = await makeRequest( +): Promise { + return makeRequest( model, Task.STREAM_GENERATE_CONTENT, apiSettings, @@ -42,6 +42,26 @@ export async function generateContentStream( JSON.stringify(params), requestOptions ); +} + +export async function generateContentStream( + apiSettings: ApiSettings, + model: string, + params: GenerateContentRequest, + chromeAdapter: ChromeAdapter, + requestOptions?: RequestOptions +): Promise { + let response; + if (await chromeAdapter.isAvailable(params)) { + response = await chromeAdapter.generateContentStream(params); + } else { + response = await generateContentStreamOnCloud( + apiSettings, + model, + params, + requestOptions + ); + } return processStream(response); } @@ -70,7 +90,7 @@ export async function generateContent( ): Promise { let response; if (await chromeAdapter.isAvailable(params)) { - response = await chromeAdapter.generateContentOnDevice(params); + response = await chromeAdapter.generateContent(params); } else { response = await generateContentOnCloud( apiSettings, diff --git a/packages/vertexai/src/models/generative-model.ts b/packages/vertexai/src/models/generative-model.ts index 0f7e408282c..81856819312 100644 --- a/packages/vertexai/src/models/generative-model.ts +++ b/packages/vertexai/src/models/generative-model.ts @@ -123,6 +123,7 @@ export class GenerativeModel extends VertexAIModel { systemInstruction: this.systemInstruction, ...formattedParams }, + this.chromeAdapter, this.requestOptions ); } From ee81092030d7e8b7e526ca508e77b6a9dd19bbec Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Mon, 21 Apr 2025 15:16:43 -0700 Subject: [PATCH 10/14] Define values for Availability enum (#8951) --- packages/vertexai/src/types/language-model.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts index 88354d0aeec..b912a1cde21 100644 --- a/packages/vertexai/src/types/language-model.ts +++ b/packages/vertexai/src/types/language-model.ts @@ -33,10 +33,10 @@ export interface LanguageModel extends EventTarget { destroy(): undefined; } export enum Availability { - 'unavailable', - 'downloadable', - 'downloading', - 'available' + 'unavailable' = 'unavailable', + 'downloadable' = 'downloadable', + 'downloading' = 'downloading', + 'available' = 'available' } export interface LanguageModelCreateCoreOptions { topK?: number; From eb1dcb493cffba705cccc577396ae8c37605c116 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Mon, 21 Apr 2025 19:43:17 -0700 Subject: [PATCH 11/14] VinF Hybrid Inference: narrow Chrome input type (#8953) --- e2e/sample-apps/modular.js | 35 ++++++------ .../src/methods/chrome-adapter.test.ts | 51 ++++-------------- .../vertexai/src/methods/chrome-adapter.ts | 53 +++++++------------ packages/vertexai/src/types/language-model.ts | 10 ++-- 4 files changed, 52 insertions(+), 97 deletions(-) diff --git a/e2e/sample-apps/modular.js b/e2e/sample-apps/modular.js index e3170bb3c57..f8b2295768a 100644 --- a/e2e/sample-apps/modular.js +++ b/e2e/sample-apps/modular.js @@ -314,13 +314,14 @@ async function callVertexAI(app) { console.log('[VERTEXAI] start'); const vertexAI = getVertexAI(app); const model = getGenerativeModel(vertexAI, { - mode: 'prefer_in_cloud' + mode: 'prefer_on_device' }); - const result = await model.generateContentStream("What is Roko's Basalisk?"); - for await (const chunk of result.stream) { - console.log(chunk.text()); - } - console.log(`[VERTEXAI] counted tokens: ${result.totalTokens}`); + const singleResult = await model.generateContent([ + { text: 'describe the following:' }, + { text: 'the mojave desert' } + ]); + console.log(`Generated text: ${singleResult.response.text()}`); + console.log(`[VERTEXAI] end`); } /** @@ -346,18 +347,18 @@ async function main() { const app = initializeApp(config); setLogLevel('warn'); - callAppCheck(app); - await authLogin(app); - await callStorage(app); - await callFirestore(app); - await callDatabase(app); - await callMessaging(app); - callAnalytics(app); - callPerformance(app); - await callFunctions(app); + // callAppCheck(app); + // await authLogin(app); + // await callStorage(app); + // await callFirestore(app); + // await callDatabase(app); + // await callMessaging(app); + // callAnalytics(app); + // callPerformance(app); + // await callFunctions(app); await callVertexAI(app); - callDataConnect(app); - await authLogout(app); + // callDataConnect(app); + // await authLogout(app); console.log('DONE'); } diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index a18812374c0..28a50ceb5eb 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -22,7 +22,8 @@ import { ChromeAdapter } from './chrome-adapter'; import { Availability, LanguageModel, - LanguageModelCreateOptions + LanguageModelCreateOptions, + LanguageModelMessageContent } from '../types/language-model'; import { stub } from 'sinon'; import { GenerateContentRequest } from '../types'; @@ -105,22 +106,6 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); - it('returns false if request content has multiple parts', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); - expect( - await adapter.isAvailable({ - contents: [ - { - role: 'user', - parts: [{ text: 'a' }, { text: 'b' }] - } - ] - }) - ).to.be.false; - }); it('returns false if request content has non-text part', async () => { const adapter = new ChromeAdapter( {} as LanguageModel, @@ -281,7 +266,8 @@ describe('ChromeAdapter', () => { create: () => Promise.resolve({}) } as LanguageModel; const languageModel = { - prompt: i => Promise.resolve(i) + // eslint-disable-next-line @typescript-eslint/no-unused-vars + prompt: (p: LanguageModelMessageContent[]) => Promise.resolve('') } as LanguageModel; const createStub = stub(languageModelProvider, 'create').resolves( languageModel @@ -305,13 +291,8 @@ describe('ChromeAdapter', () => { // Asserts Vertex input type is mapped to Chrome type. expect(promptStub).to.have.been.calledOnceWith([ { - role: request.contents[0].role, - content: [ - { - type: 'text', - content: request.contents[0].parts[0].text - } - ] + type: 'text', + content: request.contents[0].parts[0].text } ]); // Asserts expected output. @@ -366,13 +347,8 @@ describe('ChromeAdapter', () => { // Asserts Vertex input type is mapped to Chrome type. expect(measureInputUsageStub).to.have.been.calledOnceWith([ { - role: 'user', - content: [ - { - type: 'text', - content: inputText - } - ] + type: 'text', + content: inputText } ]); expect(await response.json()).to.deep.equal({ @@ -380,7 +356,7 @@ describe('ChromeAdapter', () => { }); }); }); - describe('generateContentStreamOnDevice', () => { + describe('generateContentStream', () => { it('generates content stream', async () => { const languageModelProvider = { create: () => Promise.resolve({}) @@ -413,13 +389,8 @@ describe('ChromeAdapter', () => { expect(createStub).to.have.been.calledOnceWith(onDeviceParams); expect(promptStub).to.have.been.calledOnceWith([ { - role: request.contents[0].role, - content: [ - { - type: 'text', - content: request.contents[0].parts[0].text - } - ] + type: 'text', + content: request.contents[0].parts[0].text } ]); const actual = await toStringArray(response.body!); diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index dcdb38b7fd8..2d18311f4e0 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -16,19 +16,15 @@ */ import { - Content, CountTokensRequest, GenerateContentRequest, InferenceMode, - Part, - Role + Part } from '../types'; import { Availability, LanguageModel, LanguageModelCreateOptions, - LanguageModelMessage, - LanguageModelMessageRole, LanguageModelMessageContent } from '../types/language-model'; @@ -100,8 +96,12 @@ export class ChromeAdapter { // TODO: normalize on-device params during construction. this.onDeviceParams || {} ); - const messages = ChromeAdapter.toLanguageModelMessages(request.contents); - const text = await session.prompt(messages); + // TODO: support multiple content objects when Chrome supports + // sequence + const contents = request.contents[0].parts.map( + ChromeAdapter.toLanguageModelMessageContent + ); + const text = await session.prompt(contents); return ChromeAdapter.toResponse(text); } @@ -120,8 +120,12 @@ export class ChromeAdapter { // TODO: normalize on-device params during construction. this.onDeviceParams || {} ); - const messages = ChromeAdapter.toLanguageModelMessages(request.contents); - const stream = await session.promptStreaming(messages); + // TODO: support multiple content objects when Chrome supports + // sequence + const contents = request.contents[0].parts.map( + ChromeAdapter.toLanguageModelMessageContent + ); + const stream = await session.promptStreaming(contents); return ChromeAdapter.toStreamResponse(stream); } @@ -131,8 +135,12 @@ export class ChromeAdapter { // TODO: normalize on-device params during construction. this.onDeviceParams || {} ); - const messages = ChromeAdapter.toLanguageModelMessages(request.contents); - const tokenCount = await session.measureInputUsage(messages); + // TODO: support multiple content objects when Chrome supports + // sequence + const contents = request.contents[0].parts.map( + ChromeAdapter.toLanguageModelMessageContent + ); + const tokenCount = await session.measureInputUsage(contents); return { json: async () => ({ totalTokens: tokenCount @@ -155,10 +163,6 @@ export class ChromeAdapter { return false; } - if (content.parts.length > 1) { - return false; - } - if (!content.parts[0].text) { return false; } @@ -188,25 +192,6 @@ export class ChromeAdapter { }); } - /** - * Converts a Vertex role string to a Chrome role string. - */ - private static toOnDeviceRole(role: Role): LanguageModelMessageRole { - return role === 'model' ? 'assistant' : 'user'; - } - - /** - * Converts a Vertex Content object to a Chrome LanguageModelMessage object. - */ - private static toLanguageModelMessages( - contents: Content[] - ): LanguageModelMessage[] { - return contents.map(c => ({ - role: ChromeAdapter.toOnDeviceRole(c.role), - content: c.parts.map(ChromeAdapter.toLanguageModelMessageContent) - })); - } - /** * Converts a Vertex Part object to a Chrome LanguageModelMessageContent object. */ diff --git a/packages/vertexai/src/types/language-model.ts b/packages/vertexai/src/types/language-model.ts index b912a1cde21..cd84f22dbdb 100644 --- a/packages/vertexai/src/types/language-model.ts +++ b/packages/vertexai/src/types/language-model.ts @@ -56,14 +56,12 @@ interface LanguageModelExpectedInput { type: LanguageModelMessageType; languages?: string[]; } -export type LanguageModelPrompt = - | LanguageModelMessage[] - | LanguageModelMessageShorthand[] - | string; +// TODO: revert to type from Prompt API explainer once it's supported. +export type LanguageModelPrompt = LanguageModelMessageContent[]; type LanguageModelInitialPrompts = | LanguageModelMessage[] | LanguageModelMessageShorthand[]; -export interface LanguageModelMessage { +interface LanguageModelMessage { role: LanguageModelMessageRole; content: LanguageModelMessageContent[]; } @@ -75,7 +73,7 @@ export interface LanguageModelMessageContent { type: LanguageModelMessageType; content: LanguageModelMessageContentValue; } -export type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; +type LanguageModelMessageRole = 'system' | 'user' | 'assistant'; type LanguageModelMessageType = 'text' | 'image' | 'audio'; type LanguageModelMessageContentValue = | ImageBitmapSource From 2a503a92d2a8a8221ab242687c7f8e77fc6ca35d Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Tue, 22 Apr 2025 08:49:34 -0700 Subject: [PATCH 12/14] Add image inference support (#8954) * Adding image based input for inference * adding image as input to create language model object --- .../src/methods/chrome-adapter.test.ts | 175 +++++++++++++++--- .../vertexai/src/methods/chrome-adapter.ts | 42 +++-- 2 files changed, 180 insertions(+), 37 deletions(-) diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index 28a50ceb5eb..cc371e71caa 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -25,7 +25,7 @@ import { LanguageModelCreateOptions, LanguageModelMessageContent } from '../types/language-model'; -import { stub } from 'sinon'; +import { match, stub } from 'sinon'; import { GenerateContentRequest } from '../types'; use(sinonChai); @@ -106,22 +106,6 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); - it('returns false if request content has non-text part', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); - expect( - await adapter.isAvailable({ - contents: [ - { - role: 'user', - parts: [{ inlineData: { mimeType: 'a', data: 'b' } }] - } - ] - }) - ).to.be.false; - }); it('returns false if request system instruction has function role', async () => { const adapter = new ChromeAdapter( {} as LanguageModel, @@ -189,18 +173,19 @@ describe('ChromeAdapter', () => { const createStub = stub(languageModelProvider, 'create').resolves( {} as LanguageModel ); - const onDeviceParams = {} as LanguageModelCreateOptions; const adapter = new ChromeAdapter( languageModelProvider, - 'prefer_on_device', - onDeviceParams + 'prefer_on_device' ); + const expectedOnDeviceParams = { + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; expect( await adapter.isAvailable({ contents: [{ role: 'user', parts: [{ text: 'hi' }] }] }) ).to.be.false; - expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); }); it('avoids redundant downloads', async () => { const languageModelProvider = { @@ -277,6 +262,10 @@ describe('ChromeAdapter', () => { const onDeviceParams = { systemPrompt: 'be yourself' } as LanguageModelCreateOptions; + const expectedOnDeviceParams = { + systemPrompt: 'be yourself', + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; const adapter = new ChromeAdapter( languageModelProvider, 'prefer_on_device', @@ -287,12 +276,78 @@ describe('ChromeAdapter', () => { } as GenerateContentRequest; const response = await adapter.generateContent(request); // Asserts initialization params are proxied. - expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); + // Asserts Vertex input type is mapped to Chrome type. + expect(promptStub).to.have.been.calledOnceWith([ + { + type: 'text', + content: request.contents[0].parts[0].text + } + ]); + // Asserts expected output. + expect(await response.json()).to.deep.equal({ + candidates: [ + { + content: { + parts: [{ text: promptOutput }] + } + } + ] + }); + }); + it('generates content using image type input', async () => { + const languageModelProvider = { + create: () => Promise.resolve({}) + } as LanguageModel; + const languageModel = { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + prompt: (p: LanguageModelMessageContent[]) => Promise.resolve('') + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + languageModel + ); + const promptOutput = 'hi'; + const promptStub = stub(languageModel, 'prompt').resolves(promptOutput); + const onDeviceParams = { + systemPrompt: 'be yourself' + } as LanguageModelCreateOptions; + const expectedOnDeviceParams = { + systemPrompt: 'be yourself', + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + const request = { + contents: [ + { + role: 'user', + parts: [ + { text: 'anything' }, + { + inlineData: { + data: sampleBase64EncodedImage, + mimeType: 'image/jpeg' + } + } + ] + } + ] + } as GenerateContentRequest; + const response = await adapter.generateContent(request); + // Asserts initialization params are proxied. + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); // Asserts Vertex input type is mapped to Chrome type. expect(promptStub).to.have.been.calledOnceWith([ { type: 'text', content: request.contents[0].parts[0].text + }, + { + type: 'image', + content: match.instanceOf(ImageBitmap) } ]); // Asserts expected output. @@ -314,6 +369,10 @@ describe('ChromeAdapter', () => { const onDeviceParams = { systemPrompt: 'be yourself' } as LanguageModelCreateOptions; + const expectedOnDeviceParams = { + systemPrompt: 'be yourself', + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; // setting up stubs const languageModelProvider = { @@ -343,7 +402,7 @@ describe('ChromeAdapter', () => { } as GenerateContentRequest; const response = await adapter.countTokens(countTokenRequest); // Asserts initialization params are proxied. - expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); // Asserts Vertex input type is mapped to Chrome type. expect(measureInputUsageStub).to.have.been.calledOnceWith([ { @@ -377,6 +436,9 @@ describe('ChromeAdapter', () => { }) ); const onDeviceParams = {} as LanguageModelCreateOptions; + const expectedOnDeviceParams = { + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; const adapter = new ChromeAdapter( languageModelProvider, 'prefer_on_device', @@ -386,11 +448,72 @@ describe('ChromeAdapter', () => { contents: [{ role: 'user', parts: [{ text: 'anything' }] }] } as GenerateContentRequest; const response = await adapter.generateContentStream(request); - expect(createStub).to.have.been.calledOnceWith(onDeviceParams); + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); + expect(promptStub).to.have.been.calledOnceWith([ + { + type: 'text', + content: request.contents[0].parts[0].text + } + ]); + const actual = await toStringArray(response.body!); + expect(actual).to.deep.equal([ + `data: {"candidates":[{"content":{"role":"model","parts":[{"text":["${part}"]}]}}]}\n\n` + ]); + }); + it('generates content stream with image input', async () => { + const languageModelProvider = { + create: () => Promise.resolve({}) + } as LanguageModel; + const languageModel = { + promptStreaming: _i => new ReadableStream() + } as LanguageModel; + const createStub = stub(languageModelProvider, 'create').resolves( + languageModel + ); + const part = 'hi'; + const promptStub = stub(languageModel, 'promptStreaming').returns( + new ReadableStream({ + start(controller) { + controller.enqueue([part]); + controller.close(); + } + }) + ); + const onDeviceParams = {} as LanguageModelCreateOptions; + const expectedOnDeviceParams = { + expectedInputs: [{ type: 'image' }] + } as LanguageModelCreateOptions; + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device', + onDeviceParams + ); + const request = { + contents: [ + { + role: 'user', + parts: [ + { text: 'anything' }, + { + inlineData: { + data: sampleBase64EncodedImage, + mimeType: 'image/jpeg' + } + } + ] + } + ] + } as GenerateContentRequest; + const response = await adapter.generateContentStream(request); + expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); expect(promptStub).to.have.been.calledOnceWith([ { type: 'text', content: request.contents[0].parts[0].text + }, + { + type: 'image', + content: match.instanceOf(ImageBitmap) } ]); const actual = await toStringArray(response.body!); @@ -400,3 +523,7 @@ describe('ChromeAdapter', () => { }); }); }); + +// TODO: Move to using image from test-utils. +const sampleBase64EncodedImage = + '/9j/4QDeRXhpZgAASUkqAAgAAAAGABIBAwABAAAAAQAAABoBBQABAAAAVgAAABsBBQABAAAAXgAAACgBAwABAAAAAgAAABMCAwABAAAAAQAAAGmHBAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAAABwAAkAcABAAAADAyMTABkQcABAAAAAECAwCGkgcAFgAAAMAAAAAAoAcABAAAADAxMDABoAMAAQAAAP//AAACoAQAAQAAAMgAAAADoAQAAQAAACwBAAAAAAAAQVNDSUkAAABQaWNzdW0gSUQ6IDM5MP/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRocHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/bAEMBCQkJDAsMGA0NGDIhHCEyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/CABEIASwAyAMBIgACEQEDEQH/xAAbAAABBQEBAAAAAAAAAAAAAAAAAQIDBAUGB//EABgBAQEBAQEAAAAAAAAAAAAAAAABAgME/9oADAMBAAIQAxAAAAHfA7ZFFgBQAAUUBQFBFABSUBQBQBZQUiqC7wAoigooQKACgCigKIoAosIKSigABWBdZAUAUAUQUUUAFIBQAWAFAUVFABSKoLqAKAKAKJVt4BvrFLAqKooArHgoQAoKiqDyKKoaiqhSqhCqgLFKHKdBiZmbodX5n2MbWHkdZS2kWhUBQIVUBwgUucv8Oad7nUzey3vPO5q4UrlOEWjzT0vhssDpea9Gy03BsqooKhCgCgCgHIcd0fN5DnuWHseY0Ureh+ZelLIqFq+f+gQJ5f6V5r6pE4i2ioDhCFVAVWrCiBxvJdlzFzVc56GjFoy4/a8d2q2TmpN3V1OF2MWp1/NrL0hzinRnO5Sdwc+L0Jz5HQLzyy9AYQYmDrZfXkyxVs5m4yVt3F0/M7l1YotpQnScdumqsFSb0yElm4zf5hjvV56bOtteViXq3ecRMbJgG+L4tzGqNyTDJNqMx5rfSHGRdpAcidPqLyFbuBeWrdmyONg7TJTBTrqZg3b6GGzbSzILYW8uSuF2hPG9l6uFdbPQRxzU8M2Lc62fpUJZNGC5TXAseNuVc2abO0pSKUsjdI+OdNoTzYc3fIANzF1LVTalK9KU72e1coa1TOqe3naA8inKGZ0QV5ZGzSywKWVrSAUROTjuno8lSLQbFq5kNrXsYAvQu5xmW9y18l0tjmrFu8ZM66C0nLabEsPGrT3xOlnIyXjkzC8tSxh2zRbWlsVNZtY6a9SKq1ZCd0rLHS17SPlgUtvpvatrVetlYJJZRpNcOOfmRaEN+s3Vctl0qCWs+PLljs19iWw+RdZEcU1VBFVUR6Kr5a6rplEzvnH5krF9Y33LnNFkqWIynAqZ3Zno3U03xO1mVY1HrGDxgOREpURkjiMXDUXOlsVpjRIJ0RXhix3KbUuzn6DLla6nK1RwFAKKK+GNsuigXReXW6mpRS2yWu6Zgr64Rq90abqclllYVJiJxIrAkI1JXRvJZoJJqUcY1yzmrvLnMLJX1QngWQrF9hTW01IZmwlt1F5bWtMTPruLc+fYltSVo83SKpnX/8QALRAAAQQCAQMDBAIBBQAAAAAAAQACAwQREgUQExQgITAVIjEyI0AkJTM0QXD/2gAIAQEAAQUC/wDH5Z2wu/scrHmBjg+P0hzXf0pGCSPjpnwT2bDa0LOWe6dEgCW06yYIWwRf0uVrbNdf79Grg2ZeUrxkMsco+CFleP4uRuyQvPITOjdyLzS4yy+Znqts7dtcbSZOgAB8V6Yw1nlziCE39obclR8EzZ4YrUM7vRy2PLVBpbT+Plv+Nn0RPZU42jJpc9HIwOhtqk8yU/j5dxMq+1YbrVaH2eUd/lsDpJG516zRMnjLSHRt0i+PlYss613Fli5OLBhOkwv1ShNG4PlDIqdzyunjd/l/k5NwFWu0dw/gMLlXhfFyHLD+SpGZbTq8GIR3Y7NCGKvRrd9fT5F4VgLxboXZ5ALXkgs8mFZt3I5vIvLzLYXnzL6lhfVYwvq9dfVqy5IEpzTG93618me0P9S5T96GPNQDWm+f8HifZuVlZWVlZXJnPILKysoytXsuUe0y27LHxzS92Y/ca72xzmWOW1cMcklSSKIMkbIzzYNrs8b6dO1HXYLsBaHAqS0yOTKyvLb37crZOQm5Bkcw5GFykuyqZ81iJ0mru9JgJ8bmHoGly1ds+KSNMikkXZsAduVo+5HKBwmW5mFzy5z70r43WJXEyuKz9ywjs8wzSQPdkuwUAcch/u9InavA0s2maqnMYpC1rmtjAV1zvHpVi1hiiQghz4cC8SsnUqxX0+svDrix9KgzLxeHHiiG/SX4+lyI8ZMFLVmgFz9nY2UELioNnqSRz5KEa/6AUpe0Miyrf8Dadnug6uQwOjgSyKye+WyIbAEgLuRoSxORwVLU2tTyOfJj2QlkY3ua8dGN0MhO2LmkK3bkgn7Ykjk4+KQ14BXj67YNkydqtE/VahagLVqwFo3f0PHlwe4NOSWRrh7agqxUEyZmGF9+IKG/G53Q7YPfaou9amEzV+wAI9BkY0k5PWtHOwy1d3V4zC38oKaq6WQfiw+FrIIqxXutiPRlfatWLVi0YvZTU4bDnVV4zkKpRrvUbS1F3tG4hbhbhbhS2WxtmmM0nHt0gysrZZWfR7rPXKysrZbFblblbruFZ990Nc7BCYpsxXdXcWy2WyysrPXuxrvMK7sa1ytF212120RqMZGFhY6BAoFArZZWVlZWfTC1zi+0c15y9+q1WgT4F33KOUl+0a7jMtfl2PTn4K+S0xPDoIe2srKyrE2vSGPuP7LF22/EEFq5dtybDlMAYMrZbLdOsgJ7t3KJj4xn4crK2QkKDgfTnpMThmNU1jXMbNogc/DlZWVno1+FsAvz6H5x0/KhZ7/GR0wgPd7tjD1x0f8Auoxs/wCHCwtemOuUx4ag8FZHV8bcqu33+LKysArt5WpWq1WOmShIQnSZBTBs4eyz1z8AKygvZaharC1RYsdQcESLcL8rJWVn0Z6gdG9MrKys9CAUWLtuWvUEhCRbDp7rZbLKCCygvx6s9AUCisBYRCPTKyUPQ0ooOKBK/8QAIhEAAwACAgIBBQAAAAAAAAAAAAEREBIgIQIwURMiMUBQ/9oACAEDAQE/Af5k9E9yWITC9S7RCCIQhCEGuyEcPFMTYrCYsxTrDYmVQTKhPouPJ9GyNj6iG7mEIRkZGPxZGR8aTofiRkZGM6OjY/OahNFp38lZWX5NkXxPtxuzZlNjZm5ubmxc01RqakIak4XhSl9NJxf6cJxvNCxCelMp/8QAIhEAAwACAgIBBQAAAAAAAAAAAAERECASMAIhIjFAQVBx/9oACAECAQE/Af1d6LumXZs5MTLhn51pR5WlKUulz5JLFLrR/XH8ITEIQhCCHld3IbRUesez2Px0jI8PERxIz5HyPZxRxWkIQmvI5FLil6Z137C9NJ2XFL0MhD//xAA2EAABAwEFBQcDBAEFAAAAAAABAAIRIQMQEjFBEyAiMlEEMDNSYXGRQIGhIzRCklAUQ1Nwcv/aAAgBAQAGPwL/AKfYHfyMfUttf+M1TXNyIpvHCQY+icw5OEI9ktdKBbR3sAmjZDZkxnW6TQI2HZK+a00CDG/Ri3Zm3mjonWNtGMZOTJgCdTCIaS8+ixOOCyCDLMU7sWVnQxJKaHEyMy2kqWyLSYxJwtHS5u/atiOK5z7USGmIQAHdktMONAsTnEn1WQKnojgjCdE21FAUW2b5I3aHStzZ1r3jP/d5uDbV1XyWgKzrAy3Xn+L+IXWTj5e8s2aRN2SOhVm1woXLDo1oQazmOSGLOK7hY9shYdckxvQDvGWvQxuMeBiIOSbNjs36kpjvKZXihSHhOfnhE0TuDDHrdaECGMdLu9w6khYncrBiKlBozJhWTHiHAqyd6Qms+VJsmfCwhh9k97C8EDqn/quZHlVO2Wi4e2OVO2KnamrxbIr/AGimi0OA9GL9qFXsZVeyPVezWirY2qq20H2Wbv6qy+E5hzFEFZgecKwI1Vh91bOGmV1B6K1Vr9t9vsN3mCqAm7N7SOjdE0NqQZTrTrc1ztCrJ4PC3VWDcQnF+FbvLhzfhYmmicMfKuF04skQ+eI6LFtBms0xhNXH4v2MVWIHhELCDiGvoqHWE6rWwadUHTJb5dQuE16ojaEjOt0OEX0ErDBk6IF7YnqjgYTGcLw3wpwOj2WqqFTNE4qnOViJWCaR0VXnKKKr/wAKTfJMlTEjVsolZXNoAIzRuBmEHWwaGnJzRRbTZ8PnCLZaGn0WS5KrCLM1WK0xD0OS8Jhn0RH+nZ/VeC1eC1eEFyflYHWsTkAuZ/yoZaf2Xij7hTtW/YLnb+Vzs+VLsvRybaEV6SjhENu2kNwN8yfbFoMcrf4p1o9pwikTQIl1nXQkXVXCGhYiYJ8rl+4tGTlAR5nR/IthQVS4j4WztHEnQlgVLX5YtFUwvFHyqWjflcy2r3WZZ5SjifiAyXpdha8hvRCGzwprA0kzWEABT3XCQPcKpCwsIy6IY/xRTjeD7ysAM+u5ov07LaHoVithx9JyvoB8LIfCyU7Ie+60sPG3MXHEeEZIVr7qoaUDQP6obR0x0CptPhBhDhN9Ci9xDoya0IutHusmt/iFBIXDakey8QlZ31c0fdTuY2wAeqxC0OI5yoxk+l+MWpb6XfrAV0WOyAprcOAn23ch8LLcxPxfK4XfKzCqVkhxqhquMrNZrNTzegWM0U6uP00rJThF2ar3WfdSPo5mAFDcuqwu3JYYN3EQAuZRKw4e+e3QhYYWI825hGt0aLJZd5kslxKBu5IuN2hnvc+4gIzdzQVhNfX6CqpuZX0VR39d83D6ckG7F/kafT0/xf8A/8QAKhABAAIBAwMDBAIDAQAAAAAAAQARITFBURBhcSCBkTChscHR8EBQ4fH/2gAIAQEAAT8h/wAiv8iof60/24fSvm0naH+R2aUdppQR8PVerRTWafXUA+lrvlRRsJt2f+xcK5o6rMHN0LZb9Fagaq0EyEPYezzAGwavL67l+jb1sex1ucH2lNKQvo1+4DXUq1qO8JQuOPmZPNWNPbllNUa93l+m+Nx3niXqZkfLEtIvwwS75Bt1qXL9H43mjIKjs5hxLIxhtWEwAKAMH07uBuNpYwtVXCGs7xLQcmZjdZmpBJoLnaFJ1hXpOcFSE2YaxxFP5/qcz+iXToFmTpK7yt+RC1GWVyrPaHXZjILVX8kNe0A+l+w+psg/PfTViLG0CD8QCO8wRgYDiC7aYcs8evd6Brtt3jBCFweZUJVb7fUI7W74YEcS8LFVhJzjk4dy8SodQh3BdmyEXRzd7TFspRGYByYeUzF14jPPEuXLly5cuX1voJWze2sQ9Q9zg+amaprCQ2IEoCSuY63Ir4MUahd+BmIVIZuUJECnsXWXLxBDX26+XmU6Xz/7B6iXK05n8hGGqPmbfyP/ACbwnQ2SxsPmU6p4Z+gVlGn8XL6L7f8AJtJ7Q/KUi17sMo5YxypaCW4JWPpGGnmOw2v8iFmYsfKLYjkdZeDFDDg0nxh+YLPL+3rAovb+8vPUvzA65saxNfuiJo4RLXF13F2lmFXuvaKkPabIc4ZYEFrumMtNnH9E5U7Xd/MEFXvNB7FuMe0c02mB3mVhstCBhU0/pNAtCaNTXRMJW6svWpfUs6vbSB84N+NZSDuiCsttdle72mPNFBy4gHLLvAbbzAzStbf3M1+rqfeaZZioic9GqZcBKxw6mYehtWyxgJ6A0l8UrYI2w+TpmbVfCc8e01A7G4Am8NmW9XzxHqqqOF68w02AWwwaR0UXXYymRduZhOHzFc3L8ydyHa660DiXiJbc7qbQ68TJeQN5lUp3IxjxlldJXAGhvzGQDjQla/mO1nlbX8SpaWtplxI3wfuMXhYM1gea6UwzwhqIoFb6IX3dfboerh4s/c7Ku7jYbcZBKfAP4hEIvg/xCqWcYJrnusF0L2ilrPtY/UeCdwsCgzQq1kzPaNZXE8vB0QuFCtP2R/SzWKmP5lZq66aINj8zdH3JY2L3b/EUWNVZT7SgKpYEv6iCaNkipsd5QBFfMK7/ADLhKuriEWio7PmWrwcAzdF4xALHlbKs4Z1wsK+kLuRnGtlWvBMmobbEsBvLa4Ra2bGWPmIdgfeWyhbQxMealG6ViFVJbmACj/e8MOBdG1M5KoWzlPfQP2TdqXYgVMbhBCOIfJjqCjWwEDunsDxEaxiLGc+YGofiC6/tph0fEbq08FzOOphG5asjVVFSkYRPapngwWxcu0vBdTFabfWF2AxjqRcMdpCHIuhjHRaq1shjR+YLyRaBfeDFw3B95hI3XGcc98n5iGQXeCM9ykB5sGtyXMwjvSacC9j0UgA0epLcxoY1vwIuGsVEyJgECgfuUxBo3SqX0bqmOle5Fwz9XSSp7y5TclPW+DjyysaQ2D7yoIZQUVASNWtGaMDyJZG1bMueKBkF4emONKdQe8fmlpZKmGwDaCjdRVzyl+r5RZctlwODPeW5l5eWnej0a07kyste7Cuz4iOp+IbRXiF0fvmcLfaBgGB59RCuYRi1grWpmq3zACxuMsW4ipmHSFCF5eEAxPoFO6HfPOX6g+h0Hr241UgcciUSu9EJR2iYsUkpMCjTWLHiCiA7Cd0TDl5ljaUzMJfQMGEBfQvMZ3mqnuQnZf4ej09wdMswMrA4BbDfiY6VK6VAgQ6e2d5Ei4qWqn5s+itCbuWLqhlWkq2LKEXLOty5cvqlICFMPQZcHouVl00QXXQwuRGdtTZDAmnruX12bcwwxnnJGlohhFSuj0Ybtvo6KU/mKNxw06XL6X6UuLMxjxEbIUS+eOldNT7zpWodT1r8S0So9Fsy1mBrWLawbfpjeawPRVbNOteu6hB2RJpKbpkjKiWOgWj0pKSXuUpKCg6bJfRcuX1GX0CxLzOdyKnhMtou0sa9L5JmoXcg2sE0PQOcoy+lstCp7dIO81QWXhJAJh0Zhme2lG0EaxxLeickGmHRljeW3gYGMiJWUqDT0rLS24nU3GkrAgLhBQ5orOopHhhHWKMs/9oADAMBAAIAAwAAABASIMVBgAVIggAJsGy6fNBiyj4Y5ptsnyTbFtvCz9pNNPGuqMCNo42YQIEExL6CRYMEGT8YCBzUGdVEHKQHraFgCRaW/wDNpnycuGNdceiyLtY4mcgOiOu29EEGuHlAnRrvBwEb0uqOJE43dRwqzkz2egbGwwUOslkwzPIcsSwSNhRUkWEw1v62L+JMcNPr2AmjywACL2YgqfCuq0/Cz+/jqnaGEcefx1OE4WV4cia8oyMQ8U8lMsIgsWO//8QAHREAAwACAwEBAAAAAAAAAAAAAAERECEgMVFBMP/aAAgBAwEBPxBc1+a/BIhCcITMI8QhCYQhCEJkvMQmYQhMwSNeZGhNUhCEIQb2JLs6VO48HoK5+AEVawVlRxOosomXwd8GnZFXhBRoo6jcWhEUOTSFpEsbUKcC6hquh+Q9qiTHo2Gy+i7hlYQVKEyMkG6xMadEsQVNWsKSdaxKa3svsSIaTUmSLsaJEyxoR7dxN2w294KG1dcCJhIQvQkXwVG3IpKLNtFFEf038E3ME6JsbQ4LKEhtzEIQgmkJBlpkEt46D4xkZcREF0PMJiix8T5k1yH+A//EAB4RAAMBAQADAQEBAAAAAAAAAAABERAhIDFBMFFh/9oACAECAQE/EPwf5PaPLlKXwo8u0pSlHxtGUpcdGmMo/RWlC6rOhZS5zhwLrp0UmC+CpFGXTp0aFzo0Khvgvd8QpR+8Uo8UY3hhO7WUKvQfs9qhB/Q1cMLofRRZwoyLzYIjmNwtyoqx5BNoX9YkbbejnwfUEgxiqXWPwCf4cfBQoKFzOCBKesbMOHCLwvBFnCFFE4bIRBUylKUqIyEEGxKimUpcjwmijeLKUuVFHlekUospdpk/Fii0nkmn/8QAJhABAAICAgICAgIDAQAAAAAAAQARITFBURBhcYGRobHBINHw4f/aAAgBAQABPxDweDX+J4P8jfk14NeVQJUNf4G/J4NeKleKh4JQyvDDwHipXivFQJUJUrxUrxUDuVK8ceArxUJUqVA8HioeK8VAzKglSoVUqVDLKhiV4rzUCoFwxKlSpXgPBAuVK8VKrwF+K8VApm5UCV4rxmVCVA81KlngPAY8V4qV1L8DfCB7N8RCCVTnDfgMeK8G5UJXgPJhh5NeefBszFrbCQytzUeUao/D74+vBr/AgAyf4TDfk8BC0HvMPJrzz5Du/sDX4afqAmGh09Z6tZ8y6HhnL0DxVZuAzNHW4FtX6iIo7J/LlggsaQei6lY9npH/AFNo2ptfvweTUuoeUhnWfias6ur9zmvJvwbOtJ6ixUpjK35UfuXT0sbc6a5cGnnUL5mcCXrzLchY3eC3HuH3Uh0/D9mofTOTtN9iw35PBr/Ac8U7vqA+qD5uBejEvV1kHSBKE5R22G1rFxXpUFJYPmYeA58heEtci8c45jURYWjAr6YsPtTBr6p1QtXvZiUhnAA9EqG/BL8GvF+HPAhZtt/Ep6IEFjWWXZEyZxhjcAsIVY6kJuM7G4jJYFaxpL6xBJXdgs7L3DZCXPuskrndJk1KfdVNat1CRLa/LF/QQxLhuX4PA/4VRxeHLBSZcWf99S27qvcugnIGo2dXu2sS82b2g/GU/MunLN0XKR9RXnZipcJeTeMnCR4FO+1/In8VEYLeinvEoIwVXoGXnxcJcGpfi/Fy21LB7I/QfuXRjHXqK8gK5zKKcge5qpOkLtH81MXGMwG1V9/qBRMNPJuMY1SJ6Zg5lwzDEepTJTCOyvUSXhBnJM/khigpQ1Qv9+L8DDEuGZcuXLmJy595j8JEMc8nuC1NlOYZQwYgoYo0vrHxDJYqMeAChgzKA1gouBzr1iKCjyip+TcPydMB03LYrV5B7uOogpwsP/EaDsTkPzzK6RwxgYYzbLC2ZleUPuA7/crA3mse/AtMIMvwuKgIR/JSndEl3GvmUJdIWrx7blVdY7bq36i1x4YU2iJHJpkW20V/ZNdWx0Fv1REywUgayt8QlCxGmUPVal73duXYUnWY+VQ5Vkvp1Ag0hWzxDsCsXKtreYa0/wDbifph/wDkpH0qKek5slT+CIaofwlXT1a/9MP+GH5h/wB0PqaXb0oftGVjP1D/ALmeGP0e9zIIYbq2kjuNCnKUn9MAvw3aQZgIXxSv8XKN2Iv0f+yWSW7IOyCu8DX+CATBIHSMWMyI3ofUAs5L8mJc6D+IMN6h7ePz/cKYvEpSSoVxhPc7rmPMHW38zcW1eWqOWAiW1MVH4jixHSNPq63CEMEwbVAtddYleJbjRl+6qUt1UOMD8x6hdbNH3OdTEKNn3uYnWIotw22VL6i1l282Y3BCipGSWhRzahznsOD76iAbC4lVV25rqG3MRWFkeviCur66Mct/MICcbEf7V7ghVYEpzTpqFMewB7H7lg2lxHBUByqDApdpbLOHlsg7m7CgEPbvqc3VboZs7UcmYEolD8gcGV/UE4ubQVrDspUiXl23DrBwRa6lX2IrB2HTqLvOkKi3pemJetOKgvvC7GOIgruagHj22wp4akoviWsDVT8BmYYyWD9LnBBXAfoYpCBtFdrgibPAo/mGxbGKaEFBQIhVs1BrbVCoYrPUGI40OBqpS3BgF9lwUjdg5be4fSpbgAbN6lmQ2Jw5hzC5q1qIuyH3/uYsKtqcFEDqLQa8BadkDjGVt7gxY52EBmfsodOLYW6TiLZmtcnpllt3zKfRULQeUNkDIQVQ9Ff5lSnC/dWRunxDrAWE/T/CKLUlTl81iG04NeTdNFhBjiqVjdUX+Suos14DB3m7/UOlfVaPshiMBuGIXw1mWaer/wCkSLT+T/2Jf936ilV+I/7iREraYdFtsuA2+RGbJMKx8lJYIdJ/YV/UCVpV0n+iYILiy/qU5FqApirNIF6v1dxZbfwGYPzAryVXA85iHAPqGrsbZbeqMsKUJysHNv7I/FtkKAdFZwOIWOYw1Zsbz+IgC2um/lhhRL7yfqGKZ7xXaBmJzVNxbsY+KgZZbSfOFX3AboByDpRcx0HPYk/gIWAGjp9wJXC+oGmdIVbhE/uPyjmUfUb9WRDCBz+3CRAtrtSX6iStHACJ00uQJG30oN/zKAObBH5ghoDQbNAZh0hYGwesRpxTYNn3M8XUvGTdAbhRDqWQ5RfxLD8hS2NZ0IWX0ypT1Yqgdo3KBm0HyWMsIkDDQv7QutMrDgjS9trKAWqfiVhQ0OEdVHLE4pVKutai4IfbcRaHwVMBT9kIKi7Mv43KuOoPkbgk66BXXANRgEnuq/qUdpdmQ/1HgPoCBsd/B+poNfRSMQzT7Vxof3CgoFBxqV1DBEmURG919Ra5zFyNa+O4EC9qA4O+YLAIWyXNPMVlScBr5qcc8llH2wMABLUvYO/cGGRtbVwVnqYQBQ1/lg49ExPtDEHJvqC8nyxGE4ZV9wS4xFo6tbFUaFKj1/b+ojAGFMH1RhzbxQv7shIe6Av4JyvmEsVZAvISkembc1pl36c0Hmqz+5VygUUjd0R6OEhZTwJxHTZzQpPUpWRUKrftCMsCANFcymG0C8uqmp7kBXsgC3pZW4zFwW+kJkYmEfZbK8MpBpD8za0H5LYpgE5HmLL4S6a/E4AHRiLberLAAIU3doNi6JaY16Kl3gMYQQpHqXCTGK7iiHAEfctwAMl1ACDZGZIjAHhP9gmxYd0uZuDgbf8AyJllcAPVzMwCAqjBDDZgm385nymeL8C93FMbMMoyZIXZLu/zBTUZr2mXdxLcTNsaNvzO1Ms51/cA1T5ifvUIfUIUCO6GYMBDWH8SyIsutf4gQfGEPKHVDNpOYIr0gO7gJRge4B5I+k+5R4RBU1OiEBXdSdBaaYgwASymJ0xOmNu0DxLy8HMxgR5IdcC4IhiA9koep6SYdwzbCrCJ8qWgo3cHRiW6i1t8uplil/Gm+EDlhl7+IQriMAIlZgIkN1wwlhiFNqmbEbag5Z+WVoNtRWRiYR/HxADMInphBTljsbtmU1Z/gbzMPSuJWSeADDBlpK9R844ZlatMdyuLdW9S1tSrb3KFEVL9Eq0s0bgUsaYAOAPipUv1LmagX4Lwxu4kjlTQJqPVKbt6jpQ8BuZKUtrtcE6f3BHMwzcvFNF7iaBOiwmzwsOjqWBytSlBIVYSImoGtQTiAMqnDiEA6geoV4hhglzidqIWLEpFPq4I5H7lBiHJntZbuDhMI21AlSVV7uN2K5gwnXtqV7OxsqN3aLINwxATklvqX8RQiHuNdXFDzHOdDEsiibDDMuKdysqyYxKoqwgiWhZDUs7auJaGZbGLNcNRmwMZ4mIAqoKcwvLy3uWlstiyyDpAe40mHDcNKMM4mrBo9Rql+0o0V4q6xLhQY9w1j6eBRspuziNNtwcwblPH35CF9ZnqSnZHWZbiUjAm7j7cIfkQo4s4nLrTcUFojCAm0WJlBumAvA0YCENztcMQS5Y+BCDbCzczZgiXYl6wgbC/MM1MTBZNUS1kgJOBItSqTRheZaluO2c2/Ex/A6gOYM4Z8LlvH4wctYPgKMrrNz0kaSFfBcQMbTjNkVebSsAZEYVpqUXFUIMTOEVEzSZaSS9QXSoEwwdZSWPNSnWYcxGiy1hd7QEtxE6VC8oBhFOZbOXuCXgQz1JRZhEsa8GAimGoqB4BcGhixA8DEQc3Fc1LW7gsweg3Lo024ah5Q0wDmHMZ3IicQl3RmGShHATpwWJEjhZUcytCWLOYRDCktgtnuAFhmYO5vRP/2Q=='; diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 2d18311f4e0..33a0cf03709 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -98,8 +98,8 @@ export class ChromeAdapter { ); // TODO: support multiple content objects when Chrome supports // sequence - const contents = request.contents[0].parts.map( - ChromeAdapter.toLanguageModelMessageContent + const contents = await Promise.all( + request.contents[0].parts.map(ChromeAdapter.toLanguageModelMessageContent) ); const text = await session.prompt(contents); return ChromeAdapter.toResponse(text); @@ -122,8 +122,8 @@ export class ChromeAdapter { ); // TODO: support multiple content objects when Chrome supports // sequence - const contents = request.contents[0].parts.map( - ChromeAdapter.toLanguageModelMessageContent + const contents = await Promise.all( + request.contents[0].parts.map(ChromeAdapter.toLanguageModelMessageContent) ); const stream = await session.promptStreaming(contents); return ChromeAdapter.toStreamResponse(stream); @@ -137,8 +137,8 @@ export class ChromeAdapter { ); // TODO: support multiple content objects when Chrome supports // sequence - const contents = request.contents[0].parts.map( - ChromeAdapter.toLanguageModelMessageContent + const contents = await Promise.all( + request.contents[0].parts.map(ChromeAdapter.toLanguageModelMessageContent) ); const tokenCount = await session.measureInputUsage(contents); return { @@ -162,10 +162,6 @@ export class ChromeAdapter { if (content.role === 'function') { return false; } - - if (!content.parts[0].text) { - return false; - } } return true; @@ -185,8 +181,10 @@ export class ChromeAdapter { return; } this.isDownloading = true; + const options = this.onDeviceParams || {}; + ChromeAdapter.addImageTypeAsExpectedInput(options); this.downloadPromise = this.languageModelProvider - ?.create(this.onDeviceParams) + ?.create(options) .then(() => { this.isDownloading = false; }); @@ -195,14 +193,24 @@ export class ChromeAdapter { /** * Converts a Vertex Part object to a Chrome LanguageModelMessageContent object. */ - private static toLanguageModelMessageContent( + private static async toLanguageModelMessageContent( part: Part - ): LanguageModelMessageContent { + ): Promise { if (part.text) { return { type: 'text', content: part.text }; + } else if (part.inlineData) { + const formattedImageContent = await fetch( + `data:${part.inlineData.mimeType};base64,${part.inlineData.data}` + ); + const imageBlob = await formattedImageContent.blob(); + const imageBitmap = await createImageBitmap(imageBlob); + return { + type: 'image', + content: imageBitmap + }; } // Assumes contents have been verified to contain only a single TextPart. // TODO: support other input types @@ -224,6 +232,7 @@ export class ChromeAdapter { options: LanguageModelCreateOptions ): Promise { // TODO: could we use this.onDeviceParams instead of passing in options? + ChromeAdapter.addImageTypeAsExpectedInput(options); const newSession = await this.languageModelProvider!.create(options); if (this.oldSession) { this.oldSession.destroy(); @@ -233,6 +242,13 @@ export class ChromeAdapter { return newSession; } + private static addImageTypeAsExpectedInput( + options: LanguageModelCreateOptions + ): void { + options.expectedInputs = options.expectedInputs || []; + options.expectedInputs.push({ type: 'image' }); + } + /** * Formats string returned by Chrome as a {@link Response} returned by Vertex. */ From 5fa83b371bd6e7f387e12172d0461b2b2088c79c Mon Sep 17 00:00:00 2001 From: gsiddh <92327772+gsiddh@users.noreply.github.com> Date: Tue, 22 Apr 2025 14:14:05 -0700 Subject: [PATCH 13/14] disable count tokens api for on-device inference (#8962) --- .../src/methods/chrome-adapter.test.ts | 49 +++++++------------ .../vertexai/src/methods/chrome-adapter.ts | 24 +++------ 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index cc371e71caa..1f96019e177 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -15,6 +15,7 @@ * limitations under the License. */ +import { VertexAIError } from '../errors'; import { expect, use } from 'chai'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; @@ -26,7 +27,7 @@ import { LanguageModelMessageContent } from '../types/language-model'; import { match, stub } from 'sinon'; -import { GenerateContentRequest } from '../types'; +import { GenerateContentRequest, VertexAIErrorCode } from '../types'; use(sinonChai); use(chaiAsPromised); @@ -363,17 +364,8 @@ describe('ChromeAdapter', () => { }); }); describe('countTokens', () => { - it('counts tokens from a singular input', async () => { + it('counts tokens is not yet available', async () => { const inputText = 'first'; - const expectedCount = 10; - const onDeviceParams = { - systemPrompt: 'be yourself' - } as LanguageModelCreateOptions; - const expectedOnDeviceParams = { - systemPrompt: 'be yourself', - expectedInputs: [{ type: 'image' }] - } as LanguageModelCreateOptions; - // setting up stubs const languageModelProvider = { create: () => Promise.resolve({}) @@ -385,34 +377,27 @@ describe('ChromeAdapter', () => { languageModel ); - // overrides impl with stub method - const measureInputUsageStub = stub( - languageModel, - 'measureInputUsage' - ).resolves(expectedCount); - const adapter = new ChromeAdapter( languageModelProvider, - 'prefer_on_device', - onDeviceParams + 'prefer_on_device' ); const countTokenRequest = { contents: [{ role: 'user', parts: [{ text: inputText }] }] } as GenerateContentRequest; - const response = await adapter.countTokens(countTokenRequest); - // Asserts initialization params are proxied. - expect(createStub).to.have.been.calledOnceWith(expectedOnDeviceParams); - // Asserts Vertex input type is mapped to Chrome type. - expect(measureInputUsageStub).to.have.been.calledOnceWith([ - { - type: 'text', - content: inputText - } - ]); - expect(await response.json()).to.deep.equal({ - totalTokens: expectedCount - }); + + try { + await adapter.countTokens(countTokenRequest); + } catch (e) { + // the call to countToken should be rejected with Error + expect((e as VertexAIError).code).to.equal( + VertexAIErrorCode.REQUEST_ERROR + ); + expect((e as VertexAIError).message).includes('not yet available'); + } + + // Asserts that no language model was initialized + expect(createStub).not.called; }); }); describe('generateContentStream', () => { diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 33a0cf03709..3f641a47ee4 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -15,11 +15,13 @@ * limitations under the License. */ +import { VertexAIError } from '../errors'; import { CountTokensRequest, GenerateContentRequest, InferenceMode, - Part + Part, + VertexAIErrorCode } from '../types'; import { Availability, @@ -129,23 +131,11 @@ export class ChromeAdapter { return ChromeAdapter.toStreamResponse(stream); } - async countTokens(request: CountTokensRequest): Promise { - // TODO: Check if the request contains an image, and if so, throw. - const session = await this.createSession( - // TODO: normalize on-device params during construction. - this.onDeviceParams || {} - ); - // TODO: support multiple content objects when Chrome supports - // sequence - const contents = await Promise.all( - request.contents[0].parts.map(ChromeAdapter.toLanguageModelMessageContent) + async countTokens(_request: CountTokensRequest): Promise { + throw new VertexAIError( + VertexAIErrorCode.REQUEST_ERROR, + 'Count Tokens is not yet available for on-device model.' ); - const tokenCount = await session.measureInputUsage(contents); - return { - json: async () => ({ - totalTokens: tokenCount - }) - } as Response; } /** From e8397430805348a51030b968ece3598cb94d50b4 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Tue, 22 Apr 2025 18:24:33 -0700 Subject: [PATCH 14/14] VinF Hybrid Inference: throw if only_on_device and model is unavailable (#8965) --- e2e/sample-apps/modular.js | 2 +- .../src/methods/chrome-adapter.test.ts | 81 +++++-------------- .../vertexai/src/methods/chrome-adapter.ts | 39 ++++----- 3 files changed, 43 insertions(+), 79 deletions(-) diff --git a/e2e/sample-apps/modular.js b/e2e/sample-apps/modular.js index f8b2295768a..aeebe19a4b1 100644 --- a/e2e/sample-apps/modular.js +++ b/e2e/sample-apps/modular.js @@ -314,7 +314,7 @@ async function callVertexAI(app) { console.log('[VERTEXAI] start'); const vertexAI = getVertexAI(app); const model = getGenerativeModel(vertexAI, { - mode: 'prefer_on_device' + mode: 'only_on_device' }); const singleResult = await model.generateContent([ { text: 'describe the following:' }, diff --git a/packages/vertexai/src/methods/chrome-adapter.test.ts b/packages/vertexai/src/methods/chrome-adapter.test.ts index 1f96019e177..7c671d63f30 100644 --- a/packages/vertexai/src/methods/chrome-adapter.test.ts +++ b/packages/vertexai/src/methods/chrome-adapter.test.ts @@ -61,19 +61,8 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); - it('returns false if AI API is undefined', async () => { - const adapter = new ChromeAdapter(undefined, 'prefer_on_device'); - expect( - await adapter.isAvailable({ - contents: [] - }) - ).to.be.false; - }); it('returns false if LanguageModel API is undefined', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); + const adapter = new ChromeAdapter(undefined, 'prefer_on_device'); expect( await adapter.isAvailable({ contents: [] @@ -82,7 +71,9 @@ describe('ChromeAdapter', () => { }); it('returns false if request contents empty', async () => { const adapter = new ChromeAdapter( - {} as LanguageModel, + { + availability: async () => Availability.available + } as LanguageModel, 'prefer_on_device' ); expect( @@ -93,7 +84,9 @@ describe('ChromeAdapter', () => { }); it('returns false if request content has function role', async () => { const adapter = new ChromeAdapter( - {} as LanguageModel, + { + availability: async () => Availability.available + } as LanguageModel, 'prefer_on_device' ); expect( @@ -107,51 +100,6 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); - it('returns false if request system instruction has function role', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); - expect( - await adapter.isAvailable({ - contents: [], - systemInstruction: { - role: 'function', - parts: [] - } - }) - ).to.be.false; - }); - it('returns false if request system instruction has multiple parts', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); - expect( - await adapter.isAvailable({ - contents: [], - systemInstruction: { - role: 'function', - parts: [{ text: 'a' }, { text: 'b' }] - } - }) - ).to.be.false; - }); - it('returns false if request system instruction has non-text part', async () => { - const adapter = new ChromeAdapter( - {} as LanguageModel, - 'prefer_on_device' - ); - expect( - await adapter.isAvailable({ - contents: [], - systemInstruction: { - role: 'function', - parts: [{ inlineData: { mimeType: 'a', data: 'b' } }] - } - }) - ).to.be.false; - }); it('returns true if model is readily available', async () => { const languageModelProvider = { availability: () => Promise.resolve(Availability.available) @@ -246,7 +194,20 @@ describe('ChromeAdapter', () => { ).to.be.false; }); }); - describe('generateContentOnDevice', () => { + describe('generateContent', () => { + it('throws if Chrome API is undefined', async () => { + const adapter = new ChromeAdapter(undefined, 'only_on_device'); + await expect( + adapter.generateContent({ + contents: [] + }) + ) + .to.eventually.be.rejectedWith( + VertexAIError, + 'Chrome AI requested for unsupported browser version.' + ) + .and.have.property('code', VertexAIErrorCode.REQUEST_ERROR); + }); it('generates content', async () => { const languageModelProvider = { create: () => Promise.resolve({}) diff --git a/packages/vertexai/src/methods/chrome-adapter.ts b/packages/vertexai/src/methods/chrome-adapter.ts index 3f641a47ee4..2490508889f 100644 --- a/packages/vertexai/src/methods/chrome-adapter.ts +++ b/packages/vertexai/src/methods/chrome-adapter.ts @@ -60,29 +60,26 @@ export class ChromeAdapter { * separation of concerns.

*/ async isAvailable(request: GenerateContentRequest): Promise { - // Returns false if we should only use in-cloud inference. if (this.mode === 'only_in_cloud') { return false; } - // Returns false if the on-device inference API is undefined.; - if (!this.languageModelProvider) { - return false; - } - // Returns false if the request can't be run on-device. - if (!ChromeAdapter.isOnDeviceRequest(request)) { - return false; + + const availability = await this.languageModelProvider?.availability(); + + // Triggers async model download so it'll be available next time. + if (availability === Availability.downloadable) { + this.download(); } - const availability = await this.languageModelProvider.availability(); - switch (availability) { - case Availability.available: - // Returns true only if a model is immediately available. - return true; - case Availability.downloadable: - // Triggers async download if model is downloadable. - this.download(); - default: - return false; + + if (this.mode === 'only_on_device') { + return true; } + + // Applies prefer_on_device logic. + return ( + availability === Availability.available && + ChromeAdapter.isOnDeviceRequest(request) + ); } /** @@ -221,6 +218,12 @@ export class ChromeAdapter { // TODO: define a default value, since these are optional. options: LanguageModelCreateOptions ): Promise { + if (!this.languageModelProvider) { + throw new VertexAIError( + VertexAIErrorCode.REQUEST_ERROR, + 'Chrome AI requested for unsupported browser version.' + ); + } // TODO: could we use this.onDeviceParams instead of passing in options? ChromeAdapter.addImageTypeAsExpectedInput(options); const newSession = await this.languageModelProvider!.create(options);