microsoft · pelikhan · Jan 24, 2025 · Apr 19, 2025 · Apr 19, 2025
diff --git a/.github/workflows/genai-investigator.yml b/.github/workflows/genai-investigator.yml
@@ -36,23 +36,18 @@ jobs:
             - run: yarn install --frozen-lockfile
             - name: compile
               run: yarn compile
-            #
-            # Start Ollama in a docker container
-            #
-            - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
             - name: genaiscript gai
-              run: node packages/cli/built/genaiscript.cjs run gai -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
+              run: node packages/cli/built/genaiscript.cjs run gai -p github -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
               env:
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                   GENAISCRIPT_VAR_BRANCH: ${{ github.event.workflow_run.head_branch }}
             - name: genaiscript github-agent
-              run: node packages/cli/built/genaiscript.cjs run github-agent -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
+              run: node packages/cli/built/genaiscript.cjs run github-agent -p github -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
               env:
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                   GENAISCRIPT_VAR_BRANCH: ${{ github.event.workflow_run.head_branch }}
             - name: genaiscript github-one
-              run: node packages/cli/built/genaiscript.cjs run github-one -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
+              run: node packages/cli/built/genaiscript.cjs run github-one -p github -pr ${{ github.event.workflow_run.pull_requests[0].number }} -prc --vars "workflow=${{ github.event.workflow_run.workflow_id }}" --vars "failure_run_id=${{ github.event.workflow_run.id }}" --out-trace $GITHUB_STEP_SUMMARY
               env:
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                   GENAISCRIPT_VAR_BRANCH: ${{ github.event.workflow_run.head_branch }}
diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
@@ -1,6 +1,11 @@
 // cspell: disable
 import { MarkdownTrace, TraceOptions } from "./trace"
-import { PromptImage, PromptPrediction, renderPromptNode } from "./promptdom"
+import {
+    PromptAudio,
+    PromptImage,
+    PromptPrediction,
+    renderPromptNode,
+} from "./promptdom"
 import { host, runtimeHost } from "./host"
 import { GenerationOptions } from "./generation"
 import { dispose } from "./dispose"
@@ -42,6 +47,7 @@ import { parseModelIdentifier, traceLanguageModelConnection } from "./models"
 import {
     ChatCompletionAssistantMessageParam,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
     ChatCompletionMessageParam,
     ChatCompletionResponse,
     ChatCompletionsOptions,

diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts
@@ -8,6 +8,7 @@ import { errorMessage, isCancelError, NotSupportedError } from "./error"
 import { JS_REGEX, MAX_TOOL_CALLS, PROMPTY_REGEX } from "./constants"
 import {
     finalizeMessages,
+    PromptAudio,
     PromptImage,
     PromptPrediction,
     renderPromptNode,
@@ -62,6 +63,7 @@ export async function callExpander(
     let logs = ""
     let messages: ChatCompletionMessageParam[] = []
     let images: PromptImage[] = []
+    let audios: PromptAudio[] = []
     let schemas: Record<string, JSONSchema> = {}
     let functions: ToolCallback[] = []
     let fileMerges: FileMergeHandler[] = []
@@ -142,6 +144,7 @@ export async function callExpander(
         statusText,
         messages,
         images,
+        audios,
         schemas,
         functions: Object.freeze(functions),
         fileMerges,
@@ -295,6 +298,7 @@ export async function expandTemplate(
 
     const { status, statusText, messages } = prompt
     const images = prompt.images.slice(0)
+    const audios = prompt.audios.slice(0)
     const schemas = structuredClone(prompt.schemas)
     const tools = prompt.functions.slice(0)
     const fileMerges = prompt.fileMerges.slice(0)
@@ -372,6 +376,7 @@ export async function expandTemplate(
             )
 
             if (sysr.images) images.push(...sysr.images)
+            if (sysr.audios) audios.push(...sysr.audios)
             if (sysr.schemas) Object.assign(schemas, sysr.schemas)
             if (sysr.functions) tools.push(...sysr.functions)
             if (sysr.fileMerges) fileMerges.push(...sysr.fileMerges)
@@ -424,6 +429,7 @@ export async function expandTemplate(
         cache,
         messages,
         images,
+        audios,
         schemas,
         tools,
         status: <GenerationStatus>status,

diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
@@ -63,6 +63,7 @@ export interface PromptNode extends ContextExpansionOptions {
     type?:
         | "text"
         | "image"
+        | "audio"
         | "schema"
         | "tool"
         | "fileMerge"
@@ -168,6 +169,18 @@ export interface PromptImageNode extends PromptNode {
     resolved?: PromptImage // Resolved image information
 }
 
+export interface PromptAudio {
+    filename?: string
+    data: string
+    format: "mp3" | "wav"
+}
+
+export interface PromptAudioNode extends PromptNode {
+    type: "audio"
+    value: Awaitable<PromptAudio> // Image information
+    resolved?: PromptAudio // Resolved image information
+}
+
 // Interface for a schema node.
 export interface PromptSchemaNode extends PromptNode {
     type: "schema"
@@ -691,6 +704,7 @@ export interface PromptNodeVisitor {
     def?: (node: PromptDefNode) => Awaitable<void> // Definition node visitor
     defData?: (node: PromptDefDataNode) => Awaitable<void> // Definition data node visitor
     image?: (node: PromptImageNode) => Awaitable<void> // Image node visitor
+    audio?: (node: PromptAudioNode) => Awaitable<void> // Audio node visitor
     schema?: (node: PromptSchemaNode) => Awaitable<void> // Schema node visitor
     tool?: (node: PromptToolNode) => Awaitable<void> // Function node visitor
     fileMerge?: (node: PromptFileMergeNode) => Awaitable<void> // File merge node visitor
@@ -720,6 +734,9 @@ export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) {
         case "image":
             await visitor.image?.(node as PromptImageNode)
             break
+        case "audio":
+            await visitor.audio?.(node as PromptAudioNode)
+            break
         case "schema":
             await visitor.schema?.(node as PromptSchemaNode)
             break
@@ -766,6 +783,7 @@ export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) {
 
 interface PromptNodeRender {
     images: PromptImage[] // Images included in the prompt
+    audios: PromptAudio[]
     errors: unknown[] // Errors encountered during rendering
     schemas: Record<string, JSONSchema> // Schemas included in the prompt
     tools: ToolCallback[] // tools included in the prompt
@@ -986,6 +1004,15 @@ async function resolvePromptNode(
                 n.error = e
             }
         },
+        audio: async (n) => {
+            try {
+                const v = await n.value
+                n.resolved = v
+                n.preview = n.resolved ? `<audio />` : undefined
+            } catch (e) {
+                n.error = e
+            }
+        },
     })
     return { errors: err }
 }
@@ -1358,6 +1385,7 @@ export async function renderPromptNode(
     ) => appendAssistantMessage(messages, content, options)
 
     const images: PromptImage[] = []
+    const audios: PromptAudio[] = []
     const errors: unknown[] = []
     const schemas: Record<string, JSONSchema> = {}
     const tools: ToolCallback[] = []
@@ -1415,6 +1443,16 @@ export async function renderPromptNode(
                 appendUser(value, n)
             }
         },
+        audio: async (n) => {
+            const value = n.resolved
+            if (value?.data) {
+                audios.push(value)
+                if (trace) {
+                    trace.startDetails(`🎤 audio ${value.filename || ""}`)
+                    trace.endDetails()
+                }
+            }
+        },
         schema: (n) => {
             const { name: schemaName, value: schema, options } = n
             if (schemas[schemaName])
@@ -1513,6 +1551,7 @@ ${trimNewlines(schemaText)}
 
     const res = Object.freeze<PromptNodeRender>({
         images,
+        audios,
         schemas,
         tools,
         fileMerges,

diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
@@ -1070,6 +1070,8 @@ export function createChatGenerationContext(
                         )
                         if (sysr.images?.length)
                             throw new NotSupportedError("images")
+                        if (sysr.audios?.length)
+                            throw new NotSupportedError("audios")
                         if (sysr.schemas) Object.assign(schemas, sysr.schemas)
                         if (sysr.functions) tools.push(...sysr.functions)
                         if (sysr.fileMerges?.length)

diff --git a/packages/sample/genaisrc/github-agent.genai.mts b/packages/sample/genaisrc/github-agent.genai.mts
@@ -29,7 +29,7 @@ if (jobUrl) {
     $`1. Find the failed run ${failure_run_id} of ${workflow} for branch ${branch}
     2. Find the last successful run before the failed run for the same workflow and branch`
 } else {
-    $`0. Find the worflow ${workflow} in the repository
+    $`0. Find the workflow ${workflow} in the repository
 1. Find the latest failed run of ${workflow} for branch ${branch}
 2. Find the last successful run before the failed run`
 }
@@ -38,7 +38,7 @@ $`3. Compare the run job logs between the failed run and the last successful run
     - show a diff of the source code that created the problem if possible
 5. Analyze all the above information and identify the root cause of the failure
     - generate a patch to fix the problem if possible
-6. Generate a detailled report of the failure and the root cause
+6. Generate a detailed report of the failure and the root cause
     - include a list of all HTML urls to the relevant runs, commits, pull requests or issues
     - include diff of code changes
     - include the patch if generated