Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/multimodal/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FISHJAM_ID=""
FISHJAM_TOKEN=""
GEMINI_API_KEY=""
10 changes: 10 additions & 0 deletions examples/multimodal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@ When the server is running, you can obtain peer tokens by going to <http://local

When you connect peers with audio and video, the agent will periodically capture video frames and send them along with audio to Gemini for multimodal understanding.
You can connect peers with the [fishjam minimal-react example](https://github.com/fishjam-cloud/web-client-sdk/tree/main/examples/react-client).

## Troubleshooting Gemini keys

The key is validated at startup via `createClientAndValidate`, so an invalid,
unauthorized, or region-blocked key makes the server throw and exit immediately.

If startup succeeds but the agent stays silent, the key was rejected only by the
Live native-audio model (a model-specific case the startup check can't catch).
Look in the logs for the `onerror`/`onclose` close code (e.g. 1008 "your API key
was reported as leaked", or 1011) and try a freshly rotated key.
5 changes: 4 additions & 1 deletion examples/multimodal/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Elysia } from 'elysia';
import * as GeminiIntegration from '@fishjam-cloud/js-server-sdk/gemini';
import { peerController } from './controllers/peers';
import { FishjamService } from './service/fishjam';
import { MultimodalService } from './service/multimodal';
Expand All @@ -14,7 +15,9 @@ const fishjamConfig = {

const fishjam = await FishjamService.create(fishjamConfig);

new MultimodalService(fishjamConfig, process.env.GEMINI_API_KEY);
const ai = await GeminiIntegration.createClientAndValidate({ apiKey: process.env.GEMINI_API_KEY });

new MultimodalService(fishjamConfig, ai);

const app = new Elysia().use(peerController(fishjam)).listen(3000);

Expand Down
4 changes: 2 additions & 2 deletions examples/multimodal/src/service/multimodal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ export class MultimodalService {
fishjamConfig: FishjamConfig;
fishjamClient: FishjamClient;

constructor(fishjamConfig: FishjamConfig, geminiKey: string) {
this.ai = GeminiIntegration.createClient({ apiKey: geminiKey });
constructor(fishjamConfig: FishjamConfig, ai: GoogleGenAI) {
this.ai = ai;
this.fishjamConfig = fishjamConfig;
this.fishjamClient = new FishjamClient(fishjamConfig);
this.initFishjam();
Expand Down
12 changes: 11 additions & 1 deletion examples/transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Then you need to set the following variables:

- `FISHJAM_ID`: your Fishjam ID, which you can get at <https://fishjam.io>
- `FISHJAM_TOKEN`: your Fishjam management token, which you can get at <https://fishjam.io>
- `GEMINI_API_TOKEN`: your Gemini API token, which you can get at <https://aistudio.google.com/app/apikey>
- `GEMINI_API_KEY`: your Gemini API key, which you can get at <https://aistudio.google.com/app/apikey>

Once you've set up your environment variables, all you need to do is run the following command:

Expand All @@ -23,3 +23,13 @@ When the server is running, you can obtain peer tokens by going to <http://local

When you connect the created peers, you will see their transcriptions in the terminal as logs.
You can connect peers with the [fishjam minimal-react example](https://github.com/fishjam-cloud/web-client-sdk/tree/main/examples/react-client).

## Troubleshooting Gemini keys

The key is validated at startup via `createClientAndValidate`, so an invalid,
unauthorized, or region-blocked key makes the server throw and exit immediately.

If startup succeeds but the agent stays silent, the key was rejected only by the
Live native-audio model (a model-specific case the startup check can't catch).
Look in the logs for the `onerror`/`onclose` close code (e.g. 1008 "your API key
was reported as leaked", or 1011) and try a freshly rotated key.
5 changes: 4 additions & 1 deletion examples/transcription/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Elysia } from 'elysia';
import * as GeminiIntegration from '@fishjam-cloud/js-server-sdk/gemini';
import { peerController } from './controllers/peers';
import { FishjamService } from './service/fishjam';
import { TranscriptionService } from './service/transcription';
Expand All @@ -14,7 +15,9 @@ const fishjamConfig = {

const fishjam = await FishjamService.create(fishjamConfig);

new TranscriptionService(fishjamConfig, process.env.GEMINI_API_KEY);
const genAi = await GeminiIntegration.createClientAndValidate({ apiKey: process.env.GEMINI_API_KEY });

new TranscriptionService(fishjamConfig, genAi);

const app = new Elysia().use(peerController(fishjam)).listen(3000);

Expand Down
4 changes: 2 additions & 2 deletions examples/transcription/src/service/transcription.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ export class TranscriptionService {
fishjamConfig: FishjamConfig;
fishjamClient: FishjamClient;

constructor(fishjamConfig: FishjamConfig, geminiKey: string) {
this.ai = GeminiIntegration.createClient({ apiKey: geminiKey });
constructor(fishjamConfig: FishjamConfig, ai: GoogleGenAI) {
this.ai = ai;
this.fishjamConfig = fishjamConfig;
this.fishjamClient = new FishjamClient(fishjamConfig);
this.initFishjam();
Expand Down
44 changes: 44 additions & 0 deletions packages/js-server-sdk/src/integrations/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ const SDK_NAME = 'fishjam-js-server-sdk';
* This module is a separate entry point (`@fishjam-cloud/js-server-sdk/gemini`),
* so `@google/genai` is only loaded when the consumer imports this module.
*
* Does not verify the API key against Google — use {@link createClientAndValidate}
* or call {@link checkCredentials} afterwards for that.
*
* @param options Configuration for the GoogleGenAI client.
* @returns A GoogleGenAI instance.
*/
Expand All @@ -30,6 +33,47 @@ export const createClient = (options: GoogleGenAIOptions): GoogleGenAI => {
return new GoogleGenAI(finalOptions);
};

/**
* Verifies the API key by making a single lightweight authenticated call
* (`models.list`). Resolves on success; throws if the call fails — either
* because the key was rejected or because the request itself failed (e.g.
* network/connectivity). The original error is preserved as `cause`.
*
* Note: this catches the common cases (invalid / unauthorized / wrong-project /
* region-blocked keys). It does not guarantee the key can use a specific Live
* native-audio model — such model-specific rejections still surface only via the
* `live.connect` session callbacks (`onerror`/`onclose`).
*
* @param client A GoogleGenAI instance, e.g. from {@link createClient}.
*/
export const checkCredentials = async (client: GoogleGenAI): Promise<void> => {
try {
await client.models.list();
} catch (error) {
throw new Error(
'Could not verify the Gemini API key. The key may be invalid/unauthorized (check the key and that ' +
'the Gemini API is enabled for its project/region), or the request to Gemini failed (e.g. network ' +
'connectivity). See the cause for details.',
{ cause: error }
);
}
};

/**
* Creates a GoogleGenAI client and verifies the API key before returning it,
* so misconfiguration fails fast.
*
* Throws if the key is rejected (see {@link checkCredentials}).
*
* @param options Configuration for the GoogleGenAI client.
* @returns A validated GoogleGenAI instance.
*/
export const createClientAndValidate = async (options: GoogleGenAIOptions): Promise<GoogleGenAI> => {
const client = createClient(options);
await checkCredentials(client);
return client;
};

/**
* Predefined audio settings for the agent's output track,
* configured for Gemini's 24kHz audio output.
Expand Down
23 changes: 23 additions & 0 deletions packages/js-server-sdk/tests/gemini.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { describe, it, expect } from 'vitest';
import { checkCredentials } from '../src/integrations/gemini';
import type { GoogleGenAI } from '@google/genai';

// Fake just the one method checkCredentials touches (models.list), no network / no key.
const fakeClient = (list: () => Promise<unknown>) => ({ models: { list } }) as unknown as GoogleGenAI;

describe('checkCredentials', () => {
it('resolves when the key is accepted', async () => {
await expect(checkCredentials(fakeClient(async () => ({})))).resolves.toBeUndefined();
});

it('throws a clear error wrapping the cause when verification fails', async () => {
const cause = new Error('401 API key not valid');
await expect(
checkCredentials(
fakeClient(async () => {
throw cause;
})
)
).rejects.toMatchObject({ message: expect.stringContaining('Could not verify the Gemini API key'), cause });
});
});
Loading