diff --git a/.vscode/settings.json b/.vscode/settings.json index 3119064..c5df53a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,6 @@ { "editor.defaultFormatter": "biomejs.biome", - "editor.formatOnSave": false + "editor.formatOnSave": true, + "github.gitAuthentication": false, + "github.gitProtocol": "https" } diff --git a/biome.json b/biome.json index c4c754b..f585f84 100644 --- a/biome.json +++ b/biome.json @@ -38,4 +38,4 @@ "trailingComma": "none" } } -} +} \ No newline at end of file diff --git a/package.json b/package.json index d0c28b1..4b0671f 100644 --- a/package.json +++ b/package.json @@ -9,15 +9,18 @@ "@anthropic-ai/tokenizer": "^0.0.4", "@inquirer/prompts": "^4.3.0", "cross-spawn": "^7.0.3", + "directory-tree": "^3.5.1", + "mime-types": "^2.1.35", + "worker-threads": "^1.0.0", "typescript": "^5.4.3", "youtube-transcript": "^1.1.0" }, "devDependencies": { "@biomejs/biome": "latest", "@swc/core": "^1.4.8", - "@types/node": "^20.11.30", "@types/bun": "latest", "@types/cross-spawn": "^6.0.6", + "@types/node": "^20.11.30", "tsup": "^8.0.2" }, "main": "dist/app.js", @@ -29,9 +32,11 @@ "package": "tsup", "check": "biome check --apply ." }, - "files": ["dist"], + "files": [ + "dist" + ], "type": "module", "peerDependencies": { "typescript": "^5.0.0" } -} +} \ No newline at end of file diff --git a/src/ai.ts b/src/ai.ts index 2c73ee6..310864c 100644 --- a/src/ai.ts +++ b/src/ai.ts @@ -3,15 +3,8 @@ import path from "node:path"; import Anthropic from "@anthropic-ai/sdk"; import { MessageParam } from "@anthropic-ai/sdk/resources"; import { countTokens } from "@anthropic-ai/tokenizer"; -import { - MESSAGES_FOLDER, - NUMBER_OF_CHARACTERS_TO_FLUSH_TO_FILE, - lumentisFolderPath -} from "./constants"; -import { - getOutlineInferenceMessages, - getPageGenerationInferenceMessages -} from "./prompts"; +import { MESSAGES_FOLDER, NUMBER_OF_CHARACTERS_TO_FLUSH_TO_FILE, lumentisFolderPath } from "./constants"; +import { getOutlineInferenceMessages, getPageGenerationInferenceMessages } from "./prompts"; import { Outline } from "./types"; import { partialParse } from "./utils"; @@ -31,20 +24,12 @@ export async function runClaudeInference( if (saveName) { if (!fs.existsSync(messageBackupSpot)) fs.mkdirSync(messageBackupSpot); - fs.writeFileSync( - path.join(messageBackupSpot, saveName + ".json"), - JSON.stringify(messages, null, 2) - ); + fs.writeFileSync(path.join(messageBackupSpot, saveName + ".json"), JSON.stringify(messages, null, 2)); } // remove trailing whitespace from last message - if ( - messages[messages.length - 1] && - messages[messages.length - 1].role === "assistant" - ) { - messages[messages.length - 1].content = ( - messages[messages.length - 1].content as string - ).trimEnd(); + if (messages[messages.length - 1] && messages[messages.length - 1].role === "assistant") { + messages[messages.length - 1].content = (messages[messages.length - 1].content as string).trimEnd(); } try { @@ -60,21 +45,13 @@ export async function runClaudeInference( let fullMessage = ""; let diffToFlush = 0; - if (streamToConsole) - process.stdout.write( - `\n\nStreaming from ${model}${ - saveToFilepath ? ` to ${saveToFilepath}` : "" - }: ` - ); + if (streamToConsole) process.stdout.write(`\n\nStreaming from ${model}${saveToFilepath ? ` to ${saveToFilepath}` : ""}: `); for await (const chunk of response) { const chunkText = - (chunk.type === "content_block_start" && chunk.content_block.text) || - (chunk.type === "content_block_delta" && chunk.delta.text) || - ""; + (chunk.type === "content_block_start" && chunk.content_block.text) || (chunk.type === "content_block_delta" && chunk.delta.text) || ""; - if (chunk.type === "message_delta") - outputTokens += chunk.usage.output_tokens; + if (chunk.type === "message_delta") outputTokens += chunk.usage.output_tokens; if (streamToConsole) process.stdout.write(chunkText); @@ -173,10 +150,7 @@ export async function runClaudeInference( if (saveName) { if (!fs.existsSync(messageBackupSpot)) fs.mkdirSync(messageBackupSpot); - fs.writeFileSync( - path.join(messageBackupSpot, saveName + "_response" + ".txt"), - fullMessage - ); + fs.writeFileSync(path.join(messageBackupSpot, saveName + "_response" + ".txt"), fullMessage); } if (saveToFilepath) { @@ -198,34 +172,19 @@ export async function runClaudeInference( } } -export function getClaudeCosts( - messages: MessageParam[], - outputTokensExpected: number, - model: string -) { +export function getClaudeCosts(messages: MessageParam[], outputTokensExpected: number, model: string) { const inputText: string = messages.map((m) => m.content).join("\n"); return getClaudeCostsFromText(inputText, outputTokensExpected, model); } -export function getClaudeCostsFromText( - inputPrompt: string, - outputTokensExpected: number, - model: string -) { +export function getClaudeCostsFromText(inputPrompt: string, outputTokensExpected: number, model: string) { const inputTokens = countTokens(inputPrompt); return getClaudeCostsWithTokens(inputTokens, outputTokensExpected, model); } -function getClaudeCostsWithTokens( - inputTokens: number, - outputTokens: number, - model: string -) { - const priceList: Record< - string, - { inputTokensPerM: number; outputTokensPerM } - > = { +function getClaudeCostsWithTokens(inputTokens: number, outputTokens: number, model: string) { + const priceList: Record = { "claude-3-opus-20240229": { inputTokensPerM: 15, outputTokensPerM: 75 @@ -265,23 +224,15 @@ export const CLAUDE_PRIMARYSOURCE_BUDGET = (() => { { title: "formatResponseMessage", permalink: "format-response-message", - singleSentenceDescription: - "Information on the formatResponseMessage utility function and its purpose.", + singleSentenceDescription: "Information on the formatResponseMessage utility function and its purpose.", disabled: false } ] }; - const writingMessages = getPageGenerationInferenceMessages( - outlineMessages, - outline, - outline.sections[0], - true - ); + const writingMessages = getPageGenerationInferenceMessages(outlineMessages, outline, outline.sections[0], true); - const writingTokens = countTokens( - writingMessages.map((m) => m.content).join("\n") - ); + const writingTokens = countTokens(writingMessages.map((m) => m.content).join("\n")); const OUTLINE_BUDGET = 4096 * 3; diff --git a/src/app.ts b/src/app.ts index eb5e27e..79c4fd6 100644 --- a/src/app.ts +++ b/src/app.ts @@ -3,30 +3,30 @@ import fs from "node:fs"; import path from "node:path"; import { countTokens } from "@anthropic-ai/tokenizer"; -import { YoutubeTranscript } from 'youtube-transcript'; -import { - Separator, - checkbox, - confirm, - editor, - input, - password, - select -} from "@inquirer/prompts"; -import { - CLAUDE_PRIMARYSOURCE_BUDGET, - getClaudeCosts, - runClaudeInference -} from "./ai"; +import { Separator, checkbox, confirm, editor, input, password, select } from "@inquirer/prompts"; +import dirTree from "directory-tree"; +import { YoutubeTranscript } from "youtube-transcript"; +import { CLAUDE_PRIMARYSOURCE_BUDGET, getClaudeCosts, runClaudeInference } from "./ai"; import { CLAUDE_MODELS, EDITORS, LUMENTIS_FOLDER, + MAX_TOKEN_LIMIT, RUNNERS, WRITING_STYLE_SIZE_LIMIT, lumentisFolderPath, wizardStatePath } from "./constants"; +import { + allExclusions, + checkFileIsReadable, + combineFilesToString, + flattenFileTreeForCheckbox, + getAdditionalPromptTokens, + getFileTree, + removeDeselectedItems, + removeExcludedFilesAndAddTokenCount +} from "./folder-importing/utils"; import { generatePages, idempotentlySetupNextraDocs } from "./page-generator"; import { getAudienceInferenceMessages, @@ -38,12 +38,7 @@ import { getThemeInferenceMessages, getTitleInferenceMessages } from "./prompts"; -import { - Outline, - OutlineSection, - ReadyToGeneratePage, - WizardState -} from "./types"; +import { Outline, OutlineSection, ReadyToGeneratePage, WizardState } from "./types"; import { isCommandAvailable, parsePlatformIndependentPath } from "./utils"; async function runWizard() { @@ -52,9 +47,7 @@ async function runWizard() { fs.writeFileSync(wizardStatePath, JSON.stringify(state, null, 2)); } - const wizardState: WizardState = fs.existsSync(wizardStatePath) - ? JSON.parse(fs.readFileSync(wizardStatePath, "utf-8")) - : {}; + const wizardState: WizardState = fs.existsSync(wizardStatePath) ? JSON.parse(fs.readFileSync(wizardStatePath, "utf-8")) : {}; // prettier-ignore console.log( @@ -74,9 +67,7 @@ async function runWizard() { }); if (!wizardState.gotDirectoryPermission) { - console.log( - "No problem! Start me again in a clean directory. Bye for now!" - ); + console.log("No problem! Start me again in a clean directory. Bye for now!"); return; } } @@ -86,8 +77,7 @@ async function runWizard() { // Ask for AI model to use wizardState.smarterModel = await select({ - message: - "Pick a model for meta inference.\n Smarter is preferred, you can use a cheaper model for the actual writing later.", + message: "Pick a model for meta inference.\n Smarter is preferred, you can use a cheaper model for the actual writing later.", choices: [ ...CLAUDE_MODELS.map((model) => ({ name: model.name, @@ -104,8 +94,7 @@ async function runWizard() { // Ask to stream output to console wizardState.streamToConsole = await confirm({ - message: - "Do you want to stream outputs to console? \n Looks awesome but clutters things up:", + message: "Do you want to stream outputs to console? \n Looks awesome but clutters things up:", default: wizardState.streamToConsole || false, transformer: (answer) => (answer ? "👍" : "👎") }); @@ -117,44 +106,157 @@ async function runWizard() { const fileName = await input({ message: - "What's your primary source? \n Drag a text file (or youtube link, experimental) in here, or leave empty/whitespace to open an editor: ", - default: wizardState.primarySourceFilename || undefined, + "What's your primary source? \n Drag a folder or text file (or youtube link, experimental) in here, or leave empty/whitespace to open an editor: ", + default: wizardState.primarySourceAccessName || undefined, validate: async (filename) => { - if(filename?.trim()) { - if((filename === wizardState.primarySourceFilename || filename === parsePlatformIndependentPath(filename)) && wizardState.loadedPrimarySource) return true; + if (filename?.trim()) { + if ( + (filename === wizardState.primarySourceAccessName || + wizardState.primarySourceAccessName === parsePlatformIndependentPath(filename)) && + wizardState.loadedPrimarySource + ) + return true; // return true if we've already loaded the file and it's the same if (filename.includes("youtube.com")) { try { const transcript = await YoutubeTranscript.fetchTranscript(filename); wizardState.loadedPrimarySource = transcript.map((line) => line.text).join("\n"); - wizardState.primarySourceFilename = filename; - } catch(err) { + wizardState.primarySourceAccessName = filename; + wizardState.primarySourceType = "youtube"; + } catch (err) { return `Looked like a youtube video - Couldn't fetch transcript from ${filename}: ${err}`; } - } else if(!fs.existsSync(parsePlatformIndependentPath(filename))) { - return `File not found - tried to load ${filename}. Try again.`; } else { - try { - const dataFromFile = fs.readFileSync(parsePlatformIndependentPath(filename), "utf-8"); - wizardState.loadedPrimarySource = dataFromFile; - wizardState.primarySourceFilename = parsePlatformIndependentPath(filename); - } catch(err) { - return `Couldn't read file - tried to load ${filename}. Try again.`; + const parsed_filename = parsePlatformIndependentPath(filename); + if (!fs.existsSync(parsed_filename)) return `File or folder not found - tried to load ${filename}. Try again.`; + + const file_stats = fs.lstatSync(parsed_filename); + if (file_stats.isFile()) { + if (!checkFileIsReadable(parsed_filename)) { + return `File type not supported - tried to load ${filename}. Try again.`; + } else { + try { + const dataFromFile = fs.readFileSync(parsePlatformIndependentPath(filename), "utf-8"); + wizardState.loadedPrimarySource = dataFromFile; + wizardState.primarySourceAccessName = parsePlatformIndependentPath(filename); + wizardState.primarySourceType = "file"; + } catch (err) { + return `Couldn't read file - tried to load ${filename}. Try again.`; + } + } + } else if (file_stats.isDirectory()) { + wizardState.primarySourceAccessName = parsed_filename; + wizardState.primarySourceType = "folder"; + } else if (!file_stats.isDirectory() && !file_stats.isFile()) { + return `Doesn't seem to be a file or a directory - tried to load ${filename}. Try again.`; } } } - return true; + return true; // do we need to include this? I think } }); - saveState(wizardState); - if(!wizardState.loadedPrimarySource) { + + // ________________________________________FOLDER MANAGEMENT SECTION____________________________________ + + if (wizardState.primarySourceAccessName && wizardState.primarySourceType === "folder") { + const parsed_filename = parsePlatformIndependentPath(wizardState.primarySourceAccessName); + let fileTree: dirTree.DirectoryTree | "timeoutFailed" = await getFileTree(parsed_filename); + + if (!fileTree || fileTree === "timeoutFailed") { + console.log("\nThe file tree is too large to process in a reasonable time. Exiting."); + return; // Return if the timeout is reached + } + + let completedTreeWork: { result: boolean; tokenTotal: number; tree: dirTree.DirectoryTree } | "timeoutFailed" = + await removeExcludedFilesAndAddTokenCount(fileTree); + + if (!completedTreeWork || completedTreeWork === "timeoutFailed" || !fileTree.children) { + console.log( + completedTreeWork && completedTreeWork !== "timeoutFailed" + ? "\nNo files found in directory. Try again." + : "\nThe file tree is too large to process in a reasonable time." + ); + return; + } + + fileTree = completedTreeWork.tree; + let first_time = true; + let selectedFiles: string[] = []; + let file_choices: { name: string; value: string; checked: boolean }[] | "timeoutFailed" = await flattenFileTreeForCheckbox(fileTree); + + if (!file_choices || file_choices === "timeoutFailed") { + console.log("\nThe file tree is too large to process in a reasonable time. Exiting."); + return; // Return if the timeout is reached + } + + let promptTokens = getAdditionalPromptTokens(file_choices); + + // Loop until the files selected are within the token limit + while (first_time || completedTreeWork.tokenTotal + promptTokens > CLAUDE_PRIMARYSOURCE_BUDGET) { + if (!first_time) { + console.log("\nYou've selected too many tokens. Please deselect files to exclude."); + } + first_time = false; + + console.log("before checkbox await"); + + selectedFiles = await checkbox({ + pageSize: 8, + loop: false, + message: `The token limit is ${CLAUDE_PRIMARYSOURCE_BUDGET.toLocaleString()}. +Your current file token count is ${completedTreeWork.tokenTotal.toLocaleString()}, with ${promptTokens.toLocaleString()} for the prompt, for a total of ${( + completedTreeWork.tokenTotal + promptTokens + ).toLocaleString()}. +Please deselect files to exclude. +Note: If you deselect a folder, all files within it will be excluded. +Note: Some files do not appear as we don't believe we can read them.`, + choices: file_choices, + theme: { style: { renderSelectedChoices: () => { } } } + }); + + console.log("after checkbox await"); + + completedTreeWork = await removeDeselectedItems(fileTree, selectedFiles); + if (!completedTreeWork || completedTreeWork === "timeoutFailed" || !fileTree.children) { + console.log( + completedTreeWork && completedTreeWork !== "timeoutFailed" + ? "No files found in directory. Try again." + : "The file tree is too large to process in a reasonable time." + ); + return; + } + fileTree = completedTreeWork.tree; + file_choices = await flattenFileTreeForCheckbox(fileTree); + if (!file_choices || file_choices === "timeoutFailed") { + console.log("The file tree is too large to process in a reasonable time. Exiting."); + return; // Return if the timeout is reached + } + promptTokens = getAdditionalPromptTokens(file_choices); + } + + const confirmFiles = await confirm({ + message: `${file_choices.map((val) => val.name).join("\n")}\nHere is your list of files. Confirm?`, + default: wizardState.streamToConsole || false, + transformer: (answer) => (answer ? "👍" : "👎") + }); + + if (!confirmFiles) { + console.log("\nNo problem! You can run me again to adjust the source."); + return; + } + + // TODO: Is this the best way to handle this? + wizardState.loadedPrimarySource = combineFilesToString(file_choices); + saveState(wizardState); + + console.log("Your source has a token count of:", countTokens(wizardState.loadedPrimarySource)); + } + + if (!wizardState.loadedPrimarySource) { const editorName = await select({ - message: - "Because there's a chance you never changed $EDITOR from vim, pick an editor!", - choices: EDITORS.filter((editor) => - isCommandAvailable(editor.command) - ).map((editor) => ({ + message: "Because there's a chance you never changed $EDITOR from vim, pick an editor!", + choices: EDITORS.filter((editor) => isCommandAvailable(editor.command)).map((editor) => ({ name: editor.name, value: editor.command })), @@ -174,6 +276,7 @@ async function runWizard() { }); wizardState.loadedPrimarySource = dataFromEditor; + wizardState.primarySourceType = "freetext"; } saveState(wizardState); @@ -182,9 +285,8 @@ async function runWizard() { if (primarySourceTokens > CLAUDE_PRIMARYSOURCE_BUDGET) { wizardState.ignorePrimarySourceSize = await confirm({ - message: `Your content looks a little too large by about ${ - primarySourceTokens - CLAUDE_PRIMARYSOURCE_BUDGET - } tokens (leaving some wiggle room). Generation might fail (if it does, you can always restart and adjust the source). Continue anyway?`, + message: `Your content looks a little too large by about ${primarySourceTokens - CLAUDE_PRIMARYSOURCE_BUDGET + } tokens (leaving some wiggle room). Generation might fail (if it does, you can always restart and adjust the source). Continue anyway?`, default: wizardState.ignorePrimarySourceSize || false, transformer: (answer) => (answer ? "👍" : "👎") }); @@ -199,8 +301,7 @@ async function runWizard() { wizardState.anthropicKey = (await password({ - message: - "Please enter an Anthropic API key.\n (You can leave this blank if it's already in the ENV variable.): ", + message: "Please enter an Anthropic API key.\n (You can leave this blank if it's already in the ENV variable.): ", mask: "*", validate: async (key) => { const testResponse = await runClaudeInference( @@ -219,9 +320,7 @@ async function runWizard() { // Ask for source description - const descriptionInferenceMessages = getDescriptionInferenceMessages( - wizardState.loadedPrimarySource - ); + const descriptionInferenceMessages = getDescriptionInferenceMessages(wizardState.loadedPrimarySource); const description = await input({ message: `Do you have a short description of your source?\n Who's talking, what type of content is it etc.\n (Leave empty to generate - costs $${getClaudeCosts( @@ -245,9 +344,7 @@ async function runWizard() { ); if (generatedDescription.success) { - console.log( - `Generated description \n(edit this in ${wizardStatePath} if you need to and restart!): ${generatedDescription.response}\n\n` - ); + console.log(`Generated description \n(edit this in ${wizardStatePath} if you need to and restart!): ${generatedDescription.response}\n\n`); wizardState.description = generatedDescription.response; } else { @@ -261,13 +358,9 @@ async function runWizard() { saveState(wizardState); - if (!wizardState.description?.trim()) - throw new Error("Can't continue without a description!"); + if (!wizardState.description?.trim()) throw new Error("Can't continue without a description!"); - const titleInferenceMessages = getTitleInferenceMessages( - wizardState.loadedPrimarySource, - wizardState.description - ); + const titleInferenceMessages = getTitleInferenceMessages(wizardState.loadedPrimarySource, wizardState.description); // Ask for title @@ -301,17 +394,10 @@ async function runWizard() { name: title, value: title })) - .concat([ - new Separator(), - { name: "Enter a new one", value: "__new__" }, - new Separator() - ]) + .concat([new Separator(), { name: "Enter a new one", value: "__new__" }, new Separator()]) }); - wizardState.title = - selectedAnswer === "__new__" - ? await input({ message: "Enter a new title: " }) - : selectedAnswer; + wizardState.title = selectedAnswer === "__new__" ? await input({ message: "Enter a new title: " }) : selectedAnswer; } else { wizardState.title = await input({ message: `Couldn't generate. Please type one in? `, @@ -325,12 +411,10 @@ async function runWizard() { // Ask for favicon URL - const urlPattern = - /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/; + const urlPattern = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/; wizardState.faviconUrl = await input({ message: "Choose your own favicon! \nPlease provide a URL only.", - default: - "https://raw.githubusercontent.com/HebeHH/lumentis/choose-favicon/assets/default-favicon.png", + default: "https://raw.githubusercontent.com/HebeHH/lumentis/choose-favicon/assets/default-favicon.png", // change the default to the permanent raw URL of assets/default-favicon.png, once on github validate: (favicon_url) => { if (!urlPattern.test(favicon_url.trim())) { @@ -346,9 +430,7 @@ async function runWizard() { // Ask for theme/keywords - const themesInferenceMessages = getThemeInferenceMessages( - wizardState.loadedPrimarySource - ); + const themesInferenceMessages = getThemeInferenceMessages(wizardState.loadedPrimarySource); const themesFromUser = await input({ message: `Do you have any core themes or keywords about the source or the intended audience?\n (Leave empty to generate - costs $${getClaudeCosts( @@ -386,11 +468,7 @@ async function runWizard() { message: "Enter any more (leave empty for none): " }); - wizardState.coreThemes = ( - selectedThemes.join(", ") + - " " + - newThemesFromUser - ).trim(); + wizardState.coreThemes = (selectedThemes.join(", ") + " " + newThemesFromUser).trim(); } else { wizardState.coreThemes = await input({ message: `Couldn't generate. Please type some in? `, @@ -404,10 +482,7 @@ async function runWizard() { // Ask for Audience - const audienceInferenceMessages = getAudienceInferenceMessages( - wizardState.loadedPrimarySource, - wizardState.description - ); + const audienceInferenceMessages = getAudienceInferenceMessages(wizardState.loadedPrimarySource, wizardState.description); const audienceFromUser = await input({ message: `Do you have any intended audience in mind?\n (Leave empty to generate - costs $${getClaudeCosts( @@ -415,9 +490,7 @@ async function runWizard() { 400, wizardState.smarterModel ).toFixed(4)}): `, - default: - (wizardState.intendedAudience && wizardState.intendedAudience) || - undefined + default: (wizardState.intendedAudience && wizardState.intendedAudience) || undefined }); if (audienceFromUser.trim()) { @@ -447,17 +520,12 @@ async function runWizard() { message: "Enter any more (leave empty for none): " }); - wizardState.intendedAudience = ( - selectedAudience.join(", ") + - " " + - newAudienceFromUser - ).trim(); + wizardState.intendedAudience = (selectedAudience.join(", ") + " " + newAudienceFromUser).trim(); } else { wizardState.intendedAudience = await input({ message: `Couldn't generate. Please type some keywords in? `, default: wizardState.intendedAudience, - validate: (input) => - !!input.trim() || "Please enter some words describing the audience." + validate: (input) => !!input.trim() || "Please enter some words describing the audience." }); } } @@ -466,20 +534,15 @@ async function runWizard() { // AI asks questions back - const questionsMessages = getQuestionsInferenceMessages( - wizardState.loadedPrimarySource, - wizardState.description, - wizardState.ambiguityExplained - ); + const questionsMessages = getQuestionsInferenceMessages(wizardState.loadedPrimarySource, wizardState.description, wizardState.ambiguityExplained); const questionPermission = await confirm({ - message: `Are you okay ${ - wizardState.ambiguityExplained ? "re" : "" - }answering some questions about things that might not be well explained in the primary source?\n (Costs ${getClaudeCosts( - questionsMessages, - 2048, - wizardState.smarterModel - ).toFixed(4)}): `, + message: `Are you okay ${wizardState.ambiguityExplained ? "re" : "" + }answering some questions about things that might not be well explained in the primary source?\n (Costs ${getClaudeCosts( + questionsMessages, + 2048, + wizardState.smarterModel + ).toFixed(4)}): `, default: false, transformer: (answer) => (answer ? "👍" : "👎") }); @@ -498,11 +561,8 @@ async function runWizard() { if (questionsResponse.success) { if (!wizardState.preferredEditor) { const editorName = await select({ - message: - "Because there's a chance you never changed $EDITOR from vim, pick an editor!", - choices: EDITORS.filter((editor) => - isCommandAvailable(editor.command) - ).map((editor) => ({ + message: "Because there's a chance you never changed $EDITOR from vim, pick an editor!", + choices: EDITORS.filter((editor) => isCommandAvailable(editor.command)).map((editor) => ({ name: editor.name, value: editor.command })), @@ -518,15 +578,11 @@ async function runWizard() { message: `Opening ${process.env.EDITOR} to answer:`, waitForUseInput: false, default: `Here are some questions: \n${questionsResponse.response - .map( - (question: string, index: number) => - `${index + 1}. ${question}\n\nAnswer: \n\n` - ) + .map((question: string, index: number) => `${index + 1}. ${question}\n\nAnswer: \n\n`) .join("\n")}` }); - wizardState.ambiguityExplained = - (wizardState.ambiguityExplained || "") + dataFromEditor; + wizardState.ambiguityExplained = (wizardState.ambiguityExplained || "") + dataFromEditor; } else { console.log("\n\nCould not generate. Lets skip this for now."); } @@ -537,33 +593,20 @@ async function runWizard() { // Ask for writing style const writingExampleFilename = await input({ - message: - "Do you have an example of writing style you want to add in (adds cost but improves output, \nleave blank to skip. Drag in a file): ", + message: "Do you have an example of writing style you want to add in (adds cost but improves output, \nleave blank to skip. Drag in a file): ", default: wizardState.writingExampleFilename || undefined, validate: (filename) => { - if ( - filename?.trim() && - !fs.existsSync(parsePlatformIndependentPath(filename)) - ) - return `File not found - tried to load ${filename}. Try again.`; + if (filename?.trim() && !fs.existsSync(parsePlatformIndependentPath(filename))) return `File not found - tried to load ${filename}. Try again.`; return true; } }); if (writingExampleFilename.trim()) { - wizardState.writingExampleFilename = parsePlatformIndependentPath( - writingExampleFilename - ); + wizardState.writingExampleFilename = parsePlatformIndependentPath(writingExampleFilename); - const dataFromFile = fs.readFileSync( - wizardState.writingExampleFilename, - "utf-8" - ); + const dataFromFile = fs.readFileSync(wizardState.writingExampleFilename, "utf-8"); - wizardState.writingExample = dataFromFile.substring( - 0, - WRITING_STYLE_SIZE_LIMIT - ); + wizardState.writingExample = dataFromFile.substring(0, WRITING_STYLE_SIZE_LIMIT); } saveState(wizardState); @@ -580,25 +623,19 @@ async function runWizard() { wizardState.writingExample ); - const previousOutlineInvalidated = - wizardState.outlinePrimaryPrompt && - wizardState.outlinePrimaryPrompt !== outlineQuestions[0].content; + const previousOutlineInvalidated = wizardState.outlinePrimaryPrompt && wizardState.outlinePrimaryPrompt !== outlineQuestions[0].content; if (!wizardState.generatedOutline || previousOutlineInvalidated) { const confirmOutline = await confirm({ - message: `We're about to generate the outline (Costs $${getClaudeCosts( - outlineQuestions, - 4096, - wizardState.smarterModel - ).toFixed(4)}). Confirm: `, + message: `We're about to generate the outline (Costs $${getClaudeCosts(outlineQuestions, 4096, wizardState.smarterModel).toFixed( + 4 + )}). Confirm: `, default: true, transformer: (answer) => (answer ? "👍" : "👎") }); if (!confirmOutline) { - console.log( - "No problem! You can run me again to generate the outline later." - ); + console.log("No problem! You can run me again to generate the outline later."); return; } @@ -618,25 +655,18 @@ async function runWizard() { if (outlineResponse.success) { wizardState.generatedOutline = outlineResponse.response; } else { - console.log( - "Couldn't generate the outline. You can run me again to retry." - ); + console.log("Couldn't generate the outline. You can run me again to retry."); return; } } saveState(wizardState); - function deleteDisabledSectionsAndClean( - sections: OutlineSection[] - ): OutlineSection[] { + function deleteDisabledSectionsAndClean(sections: OutlineSection[]): OutlineSection[] { return sections .filter((section) => !section.disabled) .map((section) => { - if (section.subsections) - section.subsections = deleteDisabledSectionsAndClean( - section.subsections - ); + if (section.subsections) section.subsections = deleteDisabledSectionsAndClean(section.subsections); delete section.disabled; return section; }); @@ -669,29 +699,17 @@ async function runWizard() { const flattened = [ { - name: `${"-".repeat(levels.length + 1)} ${counter}. ${ - section.title - }`, + name: `${"-".repeat(levels.length + 1)} ${counter}. ${section.title}`, value: levels.concat([section.permalink]).join("->"), checked: !section.disabled } ]; - if (section.subsections) - return flattened.concat( - flattenOutline( - section.subsections, - levels.concat([section.permalink]), - hideDisabled - ) - ); + if (section.subsections) return flattened.concat(flattenOutline(section.subsections, levels.concat([section.permalink]), hideDisabled)); return flattened; }); } - const outlineFlatList = flattenOutline( - wizardState.generatedOutline.sections, - [] - ); + const outlineFlatList = flattenOutline(wizardState.generatedOutline.sections, []); const selectedSections = await checkbox({ required: true, @@ -706,11 +724,7 @@ async function runWizard() { section.disabled = !selectedSections.includes(levelsStr); - if (section.subsections) - setDisabledSections( - section.subsections, - levels.concat([section.permalink]) - ); + if (section.subsections) setDisabledSections(section.subsections, levels.concat([section.permalink])); }); } @@ -718,30 +732,15 @@ async function runWizard() { saveState(wizardState); - const flatListForDisplay = flattenOutline( - wizardState.generatedOutline.sections, - [], - true - ); + const flatListForDisplay = flattenOutline(wizardState.generatedOutline.sections, [], true); console.log("Selected outline: \n"); - console.log( - flatListForDisplay.map((section) => section.name).join("\n") + "\n" - ); + console.log(flatListForDisplay.map((section) => section.name).join("\n") + "\n"); - const outlineCopyForImprovements = JSON.parse( - JSON.stringify(wizardState.generatedOutline) - ); - outlineCopyForImprovements.sections = deleteDisabledSectionsAndClean( - outlineCopyForImprovements.sections - ); + const outlineCopyForImprovements = JSON.parse(JSON.stringify(wizardState.generatedOutline)); + outlineCopyForImprovements.sections = deleteDisabledSectionsAndClean(outlineCopyForImprovements.sections); - let regenerateOutlineInferenceMessages = - getOutlineRegenerationInferenceMessages( - outlineQuestions, - outlineCopyForImprovements, - ".".repeat(3000) - ); + let regenerateOutlineInferenceMessages = getOutlineRegenerationInferenceMessages(outlineQuestions, outlineCopyForImprovements, ".".repeat(3000)); if (!wizardState.outlineComments) wizardState.outlineComments = ""; @@ -754,17 +753,11 @@ async function runWizard() { }); if (newSections.trim()) { - const tempOutlineComments = - wizardState.outlineComments + "\n" + newSections; + const tempOutlineComments = wizardState.outlineComments + "\n" + newSections; saveState(wizardState); - regenerateOutlineInferenceMessages = - getOutlineRegenerationInferenceMessages( - outlineQuestions, - outlineCopyForImprovements, - tempOutlineComments - ); + regenerateOutlineInferenceMessages = getOutlineRegenerationInferenceMessages(outlineQuestions, outlineCopyForImprovements, tempOutlineComments); const newSectionsResponse = await runClaudeInference( regenerateOutlineInferenceMessages, @@ -804,49 +797,27 @@ async function runWizard() { } wizardState.addDiagrams = await confirm({ - message: - "Do you want to add diagrams, latex and flowcharts? (This works perfectly 98% of the time): ", + message: "Do you want to add diagrams, latex and flowcharts? (This works perfectly 98% of the time): ", default: wizardState.addDiagrams || true, transformer: (answer) => (answer ? "👍" : "👎") }); - function getPageWritingMessages( - overallOutline: Outline, - sections: OutlineSection[], - addDiagrams: boolean - ): ReadyToGeneratePage[] { + function getPageWritingMessages(overallOutline: Outline, sections: OutlineSection[], addDiagrams: boolean): ReadyToGeneratePage[] { return sections.flatMap((section) => { const sectionsReadyToGenerate: ReadyToGeneratePage = { section, levels: section.permalink.split(/(? - pageWritingMessages - .map((page) => getClaudeCosts(page.messages, 4096, model.model)) - .reduce((a, b) => a + b, 0) + pageWritingMessages.map((page) => getClaudeCosts(page.messages, 4096, model.model)).reduce((a, b) => a + b, 0) ); wizardState.pageGenerationModel = await select({ @@ -881,26 +846,19 @@ async function runWizard() { ...CLAUDE_MODELS.map((model, index) => ({ name: model.name, value: model.model, - description: `${model.pageDescription} (costs $${costs[index].toFixed( - 4 - )})` + description: `${model.pageDescription} (costs $${costs[index].toFixed(4)})` })), new Separator() ], - default: - wizardState.pageGenerationModel || - CLAUDE_MODELS[CLAUDE_MODELS.length - 1].model + default: wizardState.pageGenerationModel || CLAUDE_MODELS[CLAUDE_MODELS.length - 1].model }); saveState(wizardState); if (!wizardState.preferredRunnerForNextra) { wizardState.preferredRunnerForNextra = await select({ - message: - "Seems we haven't set up the scaffold yet. Which runner do you prefer? Bun would be fastest if you have it.", - choices: RUNNERS.filter((editor) => - isCommandAvailable(editor.command) - ).map((editor) => ({ + message: "Seems we haven't set up the scaffold yet. Which runner do you prefer? Bun would be fastest if you have it.", + choices: RUNNERS.filter((editor) => isCommandAvailable(editor.command)).map((editor) => ({ name: editor.name, value: editor.command })), @@ -922,8 +880,7 @@ async function runWizard() { wizardState.overwritePages = (fs.existsSync(path.join(docsFolder, "pages")) && (await confirm({ - message: - "There seem to already be a pages folder. Should we overwrite? ", + message: "There seem to already be a pages folder. Should we overwrite? ", default: wizardState.overwritePages || false, transformer: (answer) => (answer ? "👍" : "👎") }))) || @@ -935,9 +892,7 @@ async function runWizard() { idempotentlySetupNextraDocs( docsFolder, // biome-ignore lint/style/noNonNullAssertion: TS can't detect it but due to current code path we know this won't be null - RUNNERS.find( - (runner) => runner.command === wizardState.preferredRunnerForNextra - )!, + RUNNERS.find((runner) => runner.command === wizardState.preferredRunnerForNextra)!, wizardState ); } @@ -953,16 +908,9 @@ async function runWizard() { return; } - console.log( - "\n\nAnd we're off! If this helps do find https://github.com/hrishioa/lumentis and drop a star!\n\n" - ); + console.log("\n\nAnd we're off! If this helps do find https://github.com/hrishioa/lumentis and drop a star!\n\n"); - await generatePages( - true, - pageWritingMessages, - path.join(docsFolder, "pages"), - wizardState - ); + await generatePages(true, pageWritingMessages, path.join(docsFolder, "pages"), wizardState); } runWizard(); diff --git a/src/constants.ts b/src/constants.ts index 2aeb4f0..44ec3ee 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -7,6 +7,7 @@ export const lumentisFolderPath = path.join(process.cwd(), LUMENTIS_FOLDER); export const wizardStatePath = path.join(lumentisFolderPath, WIZARD_STATE_FILE); export const WRITING_STYLE_SIZE_LIMIT = 10000; +export const MAX_TOKEN_LIMIT = 200000; // TODO: do we want to drop this to allow for output token context? export const MAX_HEADING_CHAR_LENGTH = 50; export const NUMBER_OF_CHARACTERS_TO_FLUSH_TO_FILE = 200; diff --git a/src/folder-importing/utils.ts b/src/folder-importing/utils.ts new file mode 100644 index 0000000..69c82f2 --- /dev/null +++ b/src/folder-importing/utils.ts @@ -0,0 +1,1304 @@ +#!/usr/bin/env node +import fs from "node:fs"; +import path from "node:path"; +import { Worker } from "node:worker_threads"; +import { countTokens } from "@anthropic-ai/tokenizer"; +import dirTree from "directory-tree"; +import mime from "mime-types"; +import type { CheckboxInput } from "src/types"; +import { parsePlatformIndependentPath } from "../utils"; + +// ___________________________________FILE EXCLUSIONS AND INCLUSIONS SECTION___________________________________ +// Edit this section to include/exclude file types and folders from the folder tree. + +// I did a bunch of scrolling through mime types to come up with this list (https://www.iana.org/assignments/media-types/media-types.xhtml) +// However there's still possibly a bunch missing. +// Be aware that not all file types are reasable with the fs.readFileSync +// we're currently using (eg: microsoft docs and excel). To add the ability +// to read these, you'll also need to add a special file reader for that file +// type. +const readableMimeTypes = ["text", "message"]; +const readableApplicationSubtypes = ["json", "xml", "yaml", "rtf", "rtx"]; +// This is using the Programming_Languages_Extensions.json from ppisarczyk: https://gist.github.com/ppisarczyk/43962d06686722d26d176fad46879d41 +const allowed_extensions = [ + ".abap", + ".asc", + ".ash", + ".ampl", + ".mod", + ".g4", + ".apib", + ".apl", + ".dyalog", + ".asp", + ".asax", + ".ascx", + ".ashx", + ".asmx", + ".aspx", + ".axd", + ".dats", + ".hats", + ".sats", + ".as", + ".adb", + ".ada", + ".ads", + ".agda", + ".als", + ".apacheconf", + ".vhost", + ".cls", + ".applescript", + ".scpt", + ".arc", + ".ino", + ".asciidoc", + ".adoc", + ".asc", + ".aj", + ".asm", + ".a51", + ".inc", + ".nasm", + ".aug", + ".ahk", + ".ahkl", + ".au3", + ".awk", + ".auk", + ".gawk", + ".mawk", + ".nawk", + ".bat", + ".cmd", + ".befunge", + ".bison", + ".bb", + ".bb", + ".decls", + ".bmx", + ".bsv", + ".boo", + ".b", + ".bf", + ".brs", + ".bro", + ".c", + ".cats", + ".h", + ".idc", + ".w", + ".cs", + ".cake", + ".cshtml", + ".csx", + ".cpp", + ".c++", + ".cc", + ".cp", + ".cxx", + ".h", + ".h++", + ".hh", + ".hpp", + ".hxx", + ".inc", + ".inl", + ".ipp", + ".tcc", + ".tpp", + ".c-objdump", + ".chs", + ".clp", + ".cmake", + ".cmake.in", + ".cob", + ".cbl", + ".ccp", + ".cobol", + ".cpy", + ".css", + ".csv", + ".capnp", + ".mss", + ".ceylon", + ".chpl", + ".ch", + ".ck", + ".cirru", + ".clw", + ".icl", + ".dcl", + ".click", + ".clj", + ".boot", + ".cl2", + ".cljc", + ".cljs", + ".cljs.hl", + ".cljscm", + ".cljx", + ".hic", + ".coffee", + "._coffee", + ".cake", + ".cjsx", + ".cson", + ".iced", + ".cfm", + ".cfml", + ".cfc", + ".lisp", + ".asd", + ".cl", + ".l", + ".lsp", + ".ny", + ".podsl", + ".sexp", + ".cp", + ".cps", + ".cl", + ".coq", + ".v", + ".cppobjdump", + ".c++-objdump", + ".c++objdump", + ".cpp-objdump", + ".cxx-objdump", + ".creole", + ".cr", + ".feature", + ".cu", + ".cuh", + ".cy", + ".pyx", + ".pxd", + ".pxi", + ".d", + ".di", + ".d-objdump", + ".com", + ".dm", + ".zone", + ".arpa", + ".d", + ".darcspatch", + ".dpatch", + ".dart", + ".diff", + ".patch", + ".dockerfile", + ".djs", + ".dylan", + ".dyl", + ".intr", + ".lid", + ".E", + ".ecl", + ".eclxml", + ".ecl", + ".sch", + ".brd", + ".epj", + ".e", + ".ex", + ".exs", + ".elm", + ".el", + ".emacs", + ".emacs.desktop", + ".em", + ".emberscript", + ".erl", + ".es", + ".escript", + ".hrl", + ".xrl", + ".yrl", + ".fs", + ".fsi", + ".fsx", + ".fx", + ".flux", + ".f90", + ".f", + ".f03", + ".f08", + ".f77", + ".f95", + ".for", + ".fpp", + ".factor", + ".fy", + ".fancypack", + ".fan", + ".fs", + ".for", + ".eam.fs", + ".fth", + ".4th", + ".f", + ".for", + ".forth", + ".fr", + ".frt", + ".fs", + ".ftl", + ".fr", + ".g", + ".gco", + ".gcode", + ".gms", + ".g", + ".gap", + ".gd", + ".gi", + ".tst", + ".s", + ".ms", + ".gd", + ".glsl", + ".fp", + ".frag", + ".frg", + ".fs", + ".fsh", + ".fshader", + ".geo", + ".geom", + ".glslv", + ".gshader", + ".shader", + ".vert", + ".vrx", + ".vsh", + ".vshader", + ".gml", + ".kid", + ".ebuild", + ".eclass", + ".po", + ".pot", + ".glf", + ".gp", + ".gnu", + ".gnuplot", + ".plot", + ".plt", + ".go", + ".golo", + ".gs", + ".gst", + ".gsx", + ".vark", + ".grace", + ".gradle", + ".gf", + ".gml", + ".graphql", + ".dot", + ".gv", + ".man", + ".l", + ".me", + ".ms", + ".n", + ".rno", + ".roff", + ".groovy", + ".grt", + ".gtpl", + ".gvy", + ".gsp", + ".hcl", + ".tf", + ".hlsl", + ".fx", + ".fxh", + ".hlsli", + ".html", + ".htm", + ".html.hl", + ".inc", + ".st", + ".xht", + ".xhtml", + ".mustache", + ".jinja", + ".eex", + ".erb", + ".erb.deface", + ".phtml", + ".http", + ".hh", + ".php", + ".haml", + ".haml.deface", + ".handlebars", + ".hbs", + ".hb", + ".hs", + ".hsc", + ".hx", + ".hxsl", + ".hy", + ".bf", + ".pro", + ".dlm", + ".ipf", + ".ini", + ".cfg", + ".prefs", + ".pro", + ".properties", + ".irclog", + ".weechatlog", + ".idr", + ".lidr", + ".ni", + ".i7x", + ".iss", + ".io", + ".ik", + ".thy", + ".ijs", + ".flex", + ".jflex", + ".json", + ".geojson", + ".lock", + ".topojson", + ".json5", + ".jsonld", + ".jq", + ".jsx", + ".jade", + ".j", + ".java", + ".jsp", + ".js", + "._js", + ".bones", + ".es", + ".es6", + ".frag", + ".gs", + ".jake", + ".jsb", + ".jscad", + ".jsfl", + ".jsm", + ".jss", + ".njs", + ".pac", + ".sjs", + ".ssjs", + ".sublime-build", + ".sublime-commands", + ".sublime-completions", + ".sublime-keymap", + ".sublime-macro", + ".sublime-menu", + ".sublime-mousemap", + ".sublime-project", + ".sublime-settings", + ".sublime-theme", + ".sublime-workspace", + ".sublime_metrics", + ".sublime_session", + ".xsjs", + ".xsjslib", + ".jl", + ".ipynb", + ".krl", + ".sch", + ".brd", + ".kicad_pcb", + ".kit", + ".kt", + ".ktm", + ".kts", + ".lfe", + ".ll", + ".lol", + ".lsl", + ".lslp", + ".lvproj", + ".lasso", + ".las", + ".lasso8", + ".lasso9", + ".ldml", + ".latte", + ".lean", + ".hlean", + ".less", + ".l", + ".lex", + ".ly", + ".ily", + ".b", + ".m", + ".ld", + ".lds", + ".mod", + ".liquid", + ".lagda", + ".litcoffee", + ".lhs", + ".ls", + "._ls", + ".xm", + ".x", + ".xi", + ".lgt", + ".logtalk", + ".lookml", + ".ls", + ".lua", + ".fcgi", + ".nse", + ".pd_lua", + ".rbxs", + ".wlua", + ".mumps", + ".m", + ".m4", + ".m4", + ".ms", + ".mcr", + ".mtml", + ".muf", + ".m", + ".mak", + ".d", + ".mk", + ".mkfile", + ".mako", + ".mao", + ".md", + ".markdown", + ".mkd", + ".mkdn", + ".mkdown", + ".ron", + ".mask", + ".mathematica", + ".cdf", + ".m", + ".ma", + ".mt", + ".nb", + ".nbp", + ".wl", + ".wlt", + ".matlab", + ".m", + ".maxpat", + ".maxhelp", + ".maxproj", + ".mxt", + ".pat", + ".mediawiki", + ".wiki", + ".m", + ".moo", + ".metal", + ".minid", + ".druby", + ".duby", + ".mir", + ".mirah", + ".mo", + ".mod", + ".mms", + ".mmk", + ".monkey", + ".moo", + ".moon", + ".myt", + ".ncl", + ".nl", + ".nsi", + ".nsh", + ".n", + ".axs", + ".axi", + ".axs.erb", + ".axi.erb", + ".nlogo", + ".nl", + ".lisp", + ".lsp", + ".nginxconf", + ".vhost", + ".nim", + ".nimrod", + ".ninja", + ".nit", + ".nix", + ".nu", + ".numpy", + ".numpyw", + ".numsc", + ".ml", + ".eliom", + ".eliomi", + ".ml4", + ".mli", + ".mll", + ".mly", + ".objdump", + ".m", + ".h", + ".mm", + ".j", + ".sj", + ".omgrofl", + ".opa", + ".opal", + ".cl", + ".opencl", + ".p", + ".cls", + ".scad", + ".org", + ".ox", + ".oxh", + ".oxo", + ".oxygene", + ".oz", + ".pwn", + ".inc", + ".php", + ".aw", + ".ctp", + ".fcgi", + ".inc", + ".php3", + ".php4", + ".php5", + ".phps", + ".phpt", + ".pls", + ".pck", + ".pkb", + ".pks", + ".plb", + ".plsql", + ".sql", + ".sql", + ".pov", + ".inc", + ".pan", + ".psc", + ".parrot", + ".pasm", + ".pir", + ".pas", + ".dfm", + ".dpr", + ".inc", + ".lpr", + ".pp", + ".pl", + ".al", + ".cgi", + ".fcgi", + ".perl", + ".ph", + ".plx", + ".pm", + ".pod", + ".psgi", + ".t", + ".6pl", + ".6pm", + ".nqp", + ".p6", + ".p6l", + ".p6m", + ".pl", + ".pl6", + ".pm", + ".pm6", + ".t", + ".pkl", + ".l", + ".pig", + ".pike", + ".pmod", + ".pod", + ".pogo", + ".pony", + ".ps", + ".eps", + ".ps1", + ".psd1", + ".psm1", + ".pde", + ".pl", + ".pro", + ".prolog", + ".yap", + ".spin", + ".proto", + ".asc", + ".pub", + ".pp", + ".pd", + ".pb", + ".pbi", + ".purs", + ".py", + ".bzl", + ".cgi", + ".fcgi", + ".gyp", + ".lmi", + ".pyde", + ".pyp", + ".pyt", + ".pyw", + ".rpy", + ".tac", + ".wsgi", + ".xpy", + ".pytb", + ".qml", + ".qbs", + ".pro", + ".pri", + ".r", + ".rd", + ".rsx", + ".raml", + ".rdoc", + ".rbbas", + ".rbfrm", + ".rbmnu", + ".rbres", + ".rbtbar", + ".rbuistate", + ".rhtml", + ".rmd", + ".rkt", + ".rktd", + ".rktl", + ".scrbl", + ".rl", + ".raw", + ".reb", + ".r", + ".r2", + ".r3", + ".rebol", + ".red", + ".reds", + ".cw", + ".rpy", + ".rs", + ".rsh", + ".robot", + ".rg", + ".rb", + ".builder", + ".fcgi", + ".gemspec", + ".god", + ".irbrc", + ".jbuilder", + ".mspec", + ".pluginspec", + ".podspec", + ".rabl", + ".rake", + ".rbuild", + ".rbw", + ".rbx", + ".ru", + ".ruby", + ".thor", + ".watchr", + ".rs", + ".rs.in", + ".sas", + ".scss", + ".smt2", + ".smt", + ".sparql", + ".rq", + ".sqf", + ".hqf", + ".sql", + ".cql", + ".ddl", + ".inc", + ".prc", + ".tab", + ".udf", + ".viw", + ".sql", + ".db2", + ".ston", + ".svg", + ".sage", + ".sagews", + ".sls", + ".sass", + ".scala", + ".sbt", + ".sc", + ".scaml", + ".scm", + ".sld", + ".sls", + ".sps", + ".ss", + ".sci", + ".sce", + ".tst", + ".self", + ".sh", + ".bash", + ".bats", + ".cgi", + ".command", + ".fcgi", + ".ksh", + ".sh.in", + ".tmux", + ".tool", + ".zsh", + ".sh-session", + ".shen", + ".sl", + ".slim", + ".smali", + ".st", + ".cs", + ".tpl", + ".sp", + ".inc", + ".sma", + ".nut", + ".stan", + ".ML", + ".fun", + ".sig", + ".sml", + ".do", + ".ado", + ".doh", + ".ihlp", + ".mata", + ".matah", + ".sthlp", + ".styl", + ".sc", + ".scd", + ".swift", + ".sv", + ".svh", + ".vh", + ".toml", + ".txl", + ".tcl", + ".adp", + ".tm", + ".tcsh", + ".csh", + ".tex", + ".aux", + ".bbx", + ".bib", + ".cbx", + ".cls", + ".dtx", + ".ins", + ".lbx", + ".ltx", + ".mkii", + ".mkiv", + ".mkvi", + ".sty", + ".toc", + ".tea", + ".t", + ".txt", + ".fr", + ".nb", + ".ncl", + ".no", + ".textile", + ".thrift", + ".t", + ".tu", + ".ttl", + ".twig", + ".ts", + ".tsx", + ".upc", + ".anim", + ".asset", + ".mat", + ".meta", + ".prefab", + ".unity", + ".uno", + ".uc", + ".ur", + ".urs", + ".vcl", + ".vhdl", + ".vhd", + ".vhf", + ".vhi", + ".vho", + ".vhs", + ".vht", + ".vhw", + ".vala", + ".vapi", + ".v", + ".veo", + ".vim", + ".vb", + ".bas", + ".cls", + ".frm", + ".frx", + ".vba", + ".vbhtml", + ".vbs", + ".volt", + ".vue", + ".owl", + ".webidl", + ".x10", + ".xc", + ".xml", + ".ant", + ".axml", + ".ccxml", + ".clixml", + ".cproject", + ".csl", + ".csproj", + ".ct", + ".dita", + ".ditamap", + ".ditaval", + ".dll.config", + ".dotsettings", + ".filters", + ".fsproj", + ".fxml", + ".glade", + ".gml", + ".grxml", + ".iml", + ".ivy", + ".jelly", + ".jsproj", + ".kml", + ".launch", + ".mdpolicy", + ".mm", + ".mod", + ".mxml", + ".nproj", + ".nuspec", + ".odd", + ".osm", + ".plist", + ".pluginspec", + ".props", + ".ps1xml", + ".psc1", + ".pt", + ".rdf", + ".rss", + ".scxml", + ".srdf", + ".storyboard", + ".stTheme", + ".sublime-snippet", + ".targets", + ".tmCommand", + ".tml", + ".tmLanguage", + ".tmPreferences", + ".tmSnippet", + ".tmTheme", + ".ts", + ".tsx", + ".ui", + ".urdf", + ".ux", + ".vbproj", + ".vcxproj", + ".vssettings", + ".vxml", + ".wsdl", + ".wsf", + ".wxi", + ".wxl", + ".wxs", + ".x3d", + ".xacro", + ".xaml", + ".xib", + ".xlf", + ".xliff", + ".xmi", + ".xml.dist", + ".xproj", + ".xsd", + ".xul", + ".zcml", + ".xsp-config", + ".xsp.metadata", + ".xpl", + ".xproc", + ".xquery", + ".xq", + ".xql", + ".xqm", + ".xqy", + ".xs", + ".xslt", + ".xsl", + ".xojo_code", + ".xojo_menu", + ".xojo_report", + ".xojo_script", + ".xojo_toolbar", + ".xojo_window", + ".xtend", + ".yml", + ".reek", + ".rviz", + ".sublime-syntax", + ".syntax", + ".yaml", + ".yaml-tmlanguage", + ".yang", + ".y", + ".yacc", + ".yy", + ".zep", + ".zimpl", + ".zmpl", + ".zpl", + ".desktop", + ".desktop.in", + ".ec", + ".eh", + ".edn", + ".fish", + ".mu", + ".nc", + ".ooc", + ".rst", + ".rest", + ".rest.txt", + ".rst.txt", + ".wisp", + ".prg", + ".ch", + ".prw", +]; + +// Extensions and paths to exclude from the folder tree. +// This is a list of extensions that are generally not readable by AI models. +// Please do add to it. We'll then do an additional file check on each file, +// but this should make those operations less heavy. +const excludeExtensions = [ + ".pdf", + ".mp4", + ".jpg", + ".jpeg", + ".png", + ".o", + ".mov", + ".mp3", + ".mpg", + ".mpeg", + ".avi", + ".wmv", + ".doc", + ".docx", + ".xls", + ".xlsx", + ".ppt", + ".pptx", + ".exe", + ".dll", + ".so", + ".dylib", + ".a", + ".lib", + ".obj", + ".pyc", + ".class", + ".jar", + ".war", + ".ear", + ".zip", + ".tar", + ".class", + ".jar", + ".war", + ".ear", + ".zip", + ".tar", + ".gz", + ".bz2", + ".xz", + ".7z", + ".rar", + ".tgz", + ".tar.gz", + ".tar.bz2", + ".tar.xz", + ".tar.7z", + ".tar.Z", + ".tar.lz", + ".tar.lzma", + ".tar.Z", + ".tar.lz4", + ".tar.lzop", + ".tar.zst", + ".tar.sz", + ".tar.br", + ".tar.bz", + ".tar.lzo", + ".gif", + ".bmp", + ".svg", + ".webp", + ".ico", + ".tif", + ".tiff", + ".heif", + ".heic", + ".mkv", + ".flv", + ".webm", + ".wav", + ".aac", + ".flac", + ".ogg", +]; +const folderTreeExclusions = [ + /^.*\/\.bin$/, + /^.*\/node_modules$/, + /^.*\/\.vscode$/, + /^.*\/\.git/, + /^.*\/test$/, + /^.*\/dist$/, + /^.*\/build$/, + /^.*\/out$/, + /^.*\/target$/, + /^.*\/venv$/, + /^.*\/__pycache__$/, + /^.*\/\.idea$/, + /^.*\/\.DS_Store$/, +]; +export const allExclusions = folderTreeExclusions.concat( + excludeExtensions.map((ex) => new RegExp(`.*${ex}$`)) +); + +// ___________________________________CHECK FILE READABILITY___________________________________ +// Recommend to the restrictions using the fields above + +function checkIfFileIsProgrammingLanguage(filename: string) { + const extension = "." + filename.split(".").pop(); + return allowed_extensions.includes(extension); +} + +export function checkFileIsReadable(filename: string) { + // Allowable if it's a code file, since most LLMs are trained on large codebases. + // Unfortunately the mime library doesn't infer most code extensions as text, + // It just returns 'false'. So we have to check for programming languages separately. + if (checkIfFileIsProgrammingLanguage(filename)) { + return true; + } + const mimeType = mime.lookup(filename); + if (!mimeType) { + return false; + } + const [type, subtype] = mimeType.split("/"); + + if (readableMimeTypes.includes(type)) { + return true; + } else if ( + type === "application" && + readableApplicationSubtypes.some((sub) => subtype.includes(sub)) + ) { + return true; + } + return false; +} + +// ___________________________________PROMPTING ADJUSTMENT__________________________________ +// Adjust the AI prompt for folders here + +const footerPromptString = "\n\n"; +const joinString = "\n\n____________________\n\n"; + +function getHeaderPromptString(filepath) { + return `\n`; +} + +export function getAdditionalPromptTokens( + flat_selection: { name: string; value: string }[] +) { + const promptString = flat_selection + .filter((file) => !file.name.includes("📁")) + .map((file) => { + return getHeaderPromptString(file.value) + footerPromptString; + }) + .join(joinString); + return countTokens(promptString); +} + +export function combineFilesToString( + flat_selection: { name: string; value: string; checked: boolean }[] +) { + return flat_selection + .filter((file) => !file.name.includes("📁")) // Faster but more fragile than 'fs.lstatSync(file.value).isFile()' + .map((file) => { + const header = getHeaderPromptString(file.value); + const content = fs.readFileSync( + parsePlatformIndependentPath(file.value), + "utf-8" + ); + return `${header}${content}${footerPromptString}`; + }) + .join(joinString); +} + +// ___________________________________WORKER THREADS___________________________________ +// Run the major work as worker threads to avoid blocking the main thread and allow timing out. +// See files starting with `worker-` for the worker scripts. + +function createTimeoutPromise(time = 5000, value = "timeoutFailed") { + return new Promise((resolve, reject) => { + setTimeout(() => { + return resolve(value); + }, time); + }); +} + +function runWorker( + workerPath: string, + data: + | string + | dirTree.DirectoryTree + | { tree: dirTree.DirectoryTree; user_selection: string[] } +) { + const worker = new Worker(workerPath); + const promise = new Promise((resolve, reject) => { + worker.postMessage(data); + worker.on("message", resolve); + worker.on("error", reject); + worker.on("exit", (code) => { + if (code !== 0 && code !== 1) { + // Code 1 is used for manual termination + reject(new Error(`Worker stopped with exit code ${code}`)); + } + }); + }); + return { worker, promise }; +} + +export async function getFileTree( + filepath: string +): Promise { + const { worker, promise } = runWorker( + path.join(__dirname, "folder-importing", "worker-dirtree.cjs"), + filepath + ); + const timeout = createTimeoutPromise(5000); + const result = await Promise.race([promise, timeout]); + worker.terminate(); + if (result === "timeoutFailed") { + return "timeoutFailed"; + } else { + return result as dirTree.DirectoryTree; + } +} + +export async function removeExcludedFilesAndAddTokenCount( + tree: dirTree.DirectoryTree +): Promise< + | { result: boolean; tokenTotal: number; tree: dirTree.DirectoryTree } + | "timeoutFailed" +> { + const { worker, promise } = runWorker( + path.join(__dirname, "folder-importing", "worker-clean-dirtree.cjs"), + tree + ); + const timeout = createTimeoutPromise(3000); + const result = await Promise.race([promise, timeout]); + worker.terminate(); + if (result === "timeoutFailed") { + return "timeoutFailed"; + } else { + return result as { + result: boolean; + tokenTotal: number; + tree: dirTree.DirectoryTree; + }; + } +} + +export async function flattenFileTreeForCheckbox( + fileTree: dirTree.DirectoryTree +): Promise { + const { worker, promise } = runWorker( + path.join( + __dirname, + "folder-importing", + "worker-flatten-tree-for-checkbox.cjs" + ), + fileTree + ); + const timeout = createTimeoutPromise(2000); + const result = await Promise.race([promise, timeout]); + worker.terminate(); + if (result === "timeoutFailed") { + return "timeoutFailed"; + } else { + return result as CheckboxInput[]; + } +} + +export async function removeDeselectedItems( + tree: dirTree.DirectoryTree, + user_selection: string[] +): Promise< + | { result: boolean; tokenTotal: number; tree: dirTree.DirectoryTree } + | "timeoutFailed" +> { + const { worker, promise } = runWorker( + path.join(__dirname, "folder-importing", "worker-remove-deselected.cjs"), + { tree, user_selection } + ); + const timeout = createTimeoutPromise(2000); + const result = await Promise.race([promise, timeout]); + worker.terminate(); + if (result === "timeoutFailed") { + return "timeoutFailed"; + } else { + return result as { + result: boolean; + tokenTotal: number; + tree: dirTree.DirectoryTree; + }; + } +} diff --git a/src/folder-importing/worker-clean-dirtree.ts b/src/folder-importing/worker-clean-dirtree.ts new file mode 100644 index 0000000..f058f8a --- /dev/null +++ b/src/folder-importing/worker-clean-dirtree.ts @@ -0,0 +1,56 @@ +import fs from "node:fs"; +import { countTokens } from "@anthropic-ai/tokenizer"; +import dirTree from "directory-tree"; +import { parsePlatformIndependentPath } from "src/utils"; +import { checkFileIsReadable } from "./utils"; + +const { parentPort } = require("node:worker_threads"); + +let folderTokenTotal = 0; + +function recursivelyRemoveExcludedFilesAndAddTokenCount(tree: dirTree.DirectoryTree) { + tree.size = 0; + if (tree.children && tree.children.length > 0) { + tree.children = tree.children + .filter((child) => { + if (child.type === "file") { + return checkFileIsReadable(child.name); + } else if (child.type === "directory" && child.children && child.children.length > 0) { + recursivelyRemoveExcludedFilesAndAddTokenCount(child); + return child.children.length > 0; + } else { + return false; + } + }) + .sort((a, b) => (a.type === "file" && b.type !== "file" ? -1 : a.type !== "file" && b.type === "file" ? 1 : 0)); + for (const child of tree.children) { + if (child.type === "file") { + const fileTokens = countTokens(fs.readFileSync(parsePlatformIndependentPath(child.path), "utf-8")); // This gets expensive with large folders. User issue? + folderTokenTotal += fileTokens; + child.size = fileTokens; + } + } + } + if (tree.type === "file") { + console.log("Should not be here: recursivelyRemoveExcludedFilesAndAddTokenCount called on a file"); + return checkFileIsReadable(tree.name); + } else if (tree.type === "directory" && tree.children) { + if (tree.children.length > 0) { + tree.size = tree.children.reduce((acc, child) => acc + child.size, 0); + return true; + } else { + return false; // return if empty directory + } + } else { + return false; // return if type is questionable + } +} + +parentPort.on("message", (dir_tree: dirTree.DirectoryTree) => { + const result = recursivelyRemoveExcludedFilesAndAddTokenCount(dir_tree); + parentPort.postMessage({ + result: result, + tokenTotal: folderTokenTotal, + tree: dir_tree + }); +}); diff --git a/src/folder-importing/worker-dirtree.ts b/src/folder-importing/worker-dirtree.ts new file mode 100644 index 0000000..42d5a48 --- /dev/null +++ b/src/folder-importing/worker-dirtree.ts @@ -0,0 +1,18 @@ +const { parentPort } = require("node:worker_threads"); + +import dirTree from "directory-tree"; +import { allExclusions } from "./utils"; + +// Wrap dirTree as a Promise +function getFileTree(filepath: string): dirTree.DirectoryTree { + const tree = dirTree(filepath, { + exclude: allExclusions, + attributes: ["size", "type", "extension"] + }); + return tree; +} + +parentPort.on("message", (filepath: string) => { + const result = getFileTree(filepath); + parentPort.postMessage(result); +}); diff --git a/src/folder-importing/worker-flatten-tree-for-checkbox.ts b/src/folder-importing/worker-flatten-tree-for-checkbox.ts new file mode 100644 index 0000000..01b12b3 --- /dev/null +++ b/src/folder-importing/worker-flatten-tree-for-checkbox.ts @@ -0,0 +1,42 @@ +import fs from "node:fs"; +import dirTree from "directory-tree"; + +import type { CheckboxInput } from "src/types"; + +const { parentPort } = require("node:worker_threads"); + +function recursivelyFlattenFileTreeForCheckbox(fileTree: dirTree.DirectoryTree, levels = 0): CheckboxInput[] { + if (fileTree.type === "file") { + return [ + { + name: `${String(fileTree.size).padEnd(10, " ")}${"--".repeat(levels)}>${fileTree.name}`, + value: fileTree.path, + checked: true + } + ]; + } + + if (fileTree.type === "directory") { + let file_choices = [ + { + name: `${String(fileTree.size).padEnd(10, " ")}${"--".repeat(levels)}📁${fileTree.name}`, + value: fileTree.path, + checked: true + } + ]; + if (fileTree.children && fileTree.children.length > 0) { + file_choices = file_choices.concat( + fileTree.children.flatMap((child) => { + return recursivelyFlattenFileTreeForCheckbox(child, levels + 1); + }) + ); + } + return file_choices; + } + return []; +} + +parentPort.on("message", (fileTree: dirTree.DirectoryTree) => { + const result = recursivelyFlattenFileTreeForCheckbox(fileTree); + parentPort.postMessage(result); +}); diff --git a/src/folder-importing/worker-remove-deselected.ts b/src/folder-importing/worker-remove-deselected.ts new file mode 100644 index 0000000..617b52a --- /dev/null +++ b/src/folder-importing/worker-remove-deselected.ts @@ -0,0 +1,56 @@ +import fs from "node:fs"; +import { countTokens } from "@anthropic-ai/tokenizer"; +import dirTree from "directory-tree"; +import { parsePlatformIndependentPath } from "src/utils"; + +const { parentPort } = require("node:worker_threads"); + +let folderTokenTotal = 0; + +function recursivelyRemoveDeselectedItems(tree: dirTree.DirectoryTree, user_selection: string[]): boolean { + tree.size = 0; + + if (tree.children && tree.children.length > 0) { + tree.children = tree.children.filter((child) => { + if (child.type === "file") { + return user_selection.includes(child.path); + } else if (child.type === "directory" && child.children && child.children.length > 0) { + if (!user_selection.includes(child.path)) { + return false; + } + recursivelyRemoveDeselectedItems(child, user_selection); + return child.children.length > 0; + } else { + return false; + } + }); + for (const child of tree.children) { + if (child.type === "file") { + folderTokenTotal += child.size; + } + } + } + + if (tree.type === "file") { + console.log("Should not be here: recursivelyRemoveDeselectedItems called on a file"); + return user_selection.includes(tree.path); + } else if (tree.type === "directory" && tree.children) { + if (tree.children.length > 0) { + tree.size = tree.children.reduce((acc, child) => acc + child.size, 0); + return true; + } else { + return false; // return if empty directory + } + } else { + return false; // return if type is questionable + } +} + +parentPort.on("message", ({ tree, user_selection }: { tree: dirTree.DirectoryTree; user_selection: string[] }) => { + const result = recursivelyRemoveDeselectedItems(tree, user_selection); + parentPort.postMessage({ + result: result, + tokenTotal: folderTokenTotal, + tree: tree + }); +}); diff --git a/src/page-generator.ts b/src/page-generator.ts index 43d1d2b..06a2431 100644 --- a/src/page-generator.ts +++ b/src/page-generator.ts @@ -11,10 +11,7 @@ function writeConfigFiles(directory: string, wizardState: WizardState) { ? JSON.parse(fs.readFileSync(path.join(directory, "package.json"), "utf-8")) : {}; - console.log( - "Looking for package.json in ", - path.join(directory, "package.json") - ); + console.log("Looking for package.json in ", path.join(directory, "package.json")); packageJSON = { ...packageJSON, @@ -32,10 +29,7 @@ function writeConfigFiles(directory: string, wizardState: WizardState) { keywords: wizardState.coreThemes?.split(",").map((kw) => kw.trim()) || [] }; - fs.writeFileSync( - path.join(directory, "package.json"), - JSON.stringify(packageJSON, null, 2) - ); + fs.writeFileSync(path.join(directory, "package.json"), JSON.stringify(packageJSON, null, 2)); fs.writeFileSync( path.join(directory, "next.config.js"), @@ -157,11 +151,7 @@ Change things in \`pages\` to see the effect. } } -export function idempotentlySetupNextraDocs( - directory: string, - runner: (typeof RUNNERS)[number], - wizardState: WizardState -) { +export function idempotentlySetupNextraDocs(directory: string, runner: (typeof RUNNERS)[number], wizardState: WizardState) { // TODO: This might not be working? if (fs.existsSync(path.join(directory, "package.json"))) { console.log("Looks like project directory should be set up, skipping..."); @@ -169,13 +159,10 @@ export function idempotentlySetupNextraDocs( } try { - execSync( - `${runner.command} ${runner.installPrefix} react react-dom next nextra nextra-theme-docs typescript @types/node`, - { - cwd: directory, - stdio: "inherit" - } - ); + execSync(`${runner.command} ${runner.installPrefix} react react-dom next nextra nextra-theme-docs typescript @types/node`, { + cwd: directory, + stdio: "inherit" + }); } catch (err) { throw new Error(`Failed to install Requirements: ${err}`); } @@ -183,12 +170,7 @@ export function idempotentlySetupNextraDocs( writeConfigFiles(directory, wizardState); } -export async function generatePages( - startNextra: boolean, - pages: ReadyToGeneratePage[], - pagesFolder: string, - wizardState: WizardState -) { +export async function generatePages(startNextra: boolean, pages: ReadyToGeneratePage[], pagesFolder: string, wizardState: WizardState) { if (!fs.existsSync(pagesFolder)) { throw new Error(`Pages folder ${pagesFolder} does not exist`); } @@ -198,9 +180,7 @@ export async function generatePages( }); if (!preferredRunner) { - throw new Error( - `Preferred runner for \`nextra\` not found: ${wizardState.preferredRunnerForNextra}` - ); + throw new Error(`Preferred runner for \`nextra\` not found: ${wizardState.preferredRunnerForNextra}`); } if (startNextra) { @@ -235,36 +215,25 @@ export async function generatePages( fs.writeFileSync(path.join(pageFolder, "_meta.json"), JSON.stringify({})); } - const metaJSON = JSON.parse( - fs.readFileSync(path.join(pageFolder, "_meta.json"), "utf-8") - ); + const metaJSON = JSON.parse(fs.readFileSync(path.join(pageFolder, "_meta.json"), "utf-8")); if (!metaJSON[permalink]) { // TODO: Need this damn monkeypatch because Nextra doesn't // seem to support nested pages at the top level metaJSON[permalink] = page.section.title; if (i === 1) { - if ( - pages.find( - (p) => p.levels.length > 1 && p.levels[0] === pages[0].levels[0] - ) - ) { + if (pages.find((p) => p.levels.length > 1 && p.levels[0] === pages[0].levels[0])) { fs.writeFileSync( path.join(pagesFolder, "_meta.json"), JSON.stringify({ - ...JSON.parse( - fs.readFileSync(path.join(pagesFolder, "_meta.json"), "utf-8") - ), + ...JSON.parse(fs.readFileSync(path.join(pagesFolder, "_meta.json"), "utf-8")), [pages[0].levels[pages[0].levels.length - 1]]: "Basics" }) ); } } - fs.writeFileSync( - path.join(pageFolder, "_meta.json"), - JSON.stringify(metaJSON, null, 2) - ); + fs.writeFileSync(path.join(pageFolder, "_meta.json"), JSON.stringify(metaJSON, null, 2)); } const pagePath = path.join(pageFolder, permalink + ".mdx"); @@ -274,8 +243,7 @@ export async function generatePages( continue; } - if (!wizardState.pageGenerationModel) - throw new Error("No page generation model set"); + if (!wizardState.pageGenerationModel) throw new Error("No page generation model set"); await runClaudeInference( page.messages, diff --git a/src/prompts.ts b/src/prompts.ts index ce917a5..015f309 100644 --- a/src/prompts.ts +++ b/src/prompts.ts @@ -1,10 +1,7 @@ import type { MessageParam } from "@anthropic-ai/sdk/resources"; import { Outline, OutlineSection } from "./types"; -export function getTitleInferenceMessages( - primarySource: string, - description: string -): MessageParam[] { +export function getTitleInferenceMessages(primarySource: string, description: string): MessageParam[] { return [ // prettier-ignore { @@ -24,10 +21,7 @@ Please generate up to 10 possible names for documentation we want to build, for ]; } -export function getAudienceInferenceMessages( - primarySource: string, - description: string -): MessageParam[] { +export function getAudienceInferenceMessages(primarySource: string, description: string): MessageParam[] { return [ // prettier-ignore { @@ -47,9 +41,7 @@ Please generate up to 10 words describing the intended audience for creating doc ]; } -export function getThemeInferenceMessages( - primarySource: string -): MessageParam[] { +export function getThemeInferenceMessages(primarySource: string): MessageParam[] { return [ // prettier-ignore { @@ -67,9 +59,7 @@ Please generate up to 10 possible keywords referring to industries, technologies ]; } -export function getDescriptionInferenceMessages( - primarySource: string -): MessageParam[] { +export function getDescriptionInferenceMessages(primarySource: string): MessageParam[] { return [ // prettier-ignore { @@ -84,11 +74,7 @@ Please provide a three sentence description of the information in PrimarySource. ]; } -export function getQuestionsInferenceMessages( - primarySource: string, - description: string, - alreadyAnsweredQuestions?: string -): MessageParam[] { +export function getQuestionsInferenceMessages(primarySource: string, description: string, alreadyAnsweredQuestions?: string): MessageParam[] { return [ // prettier-ignore { @@ -218,14 +204,12 @@ type Outline = { const optionalWritingGuidelines = { diagramsAndLatex: { - guideline: - "Add mermaid diagrams in markdown (```mermaid) and latex (surrounded by $) when needed.", + guideline: "Add mermaid diagrams in markdown (```mermaid) and latex (surrounded by $) when needed.", index: 1 }, deeplyTechnical: { index: 3, - guideline: - "Only write about what is in PrimarySource, for the intended audience at their level of understanding about things they care about." + guideline: "Only write about what is in PrimarySource, for the intended audience at their level of understanding about things they care about." } }; @@ -259,14 +243,9 @@ export function getPageGenerationInferenceMessages( ): MessageParam[] { const actualWritingGuidelines = addDiagrams ? [ - ...writingGuidelines.slice( - 0, - optionalWritingGuidelines.diagramsAndLatex.index - ), + ...writingGuidelines.slice(0, optionalWritingGuidelines.diagramsAndLatex.index), optionalWritingGuidelines.diagramsAndLatex.guideline, - ...writingGuidelines.slice( - optionalWritingGuidelines.diagramsAndLatex.index - ) + ...writingGuidelines.slice(optionalWritingGuidelines.diagramsAndLatex.index) ] : writingGuidelines.slice(0, -1); @@ -279,16 +258,12 @@ export function getPageGenerationInferenceMessages( // prettier-ignore { role: "user", - content: `Now we're going to specifically write the section ${ - selectedSection.title - } (permalink: ${selectedSection.permalink}) in mdx, following these guidelines: + content: `Now we're going to specifically write the section ${selectedSection.title} (permalink: ${selectedSection.permalink}) in mdx, following these guidelines: ${actualWritingGuidelines.map((g, i) => `${i + 1}. ${g}`).join("\n")} ${ selectedSection.subsections - ? `${ - actualWritingGuidelines.length + 1 - }The subsections ${selectedSection.subsections + ? `${actualWritingGuidelines.length + 1}The subsections ${selectedSection.subsections .map((s) => s.title) .join(", ")} will be written later, and don't need to elaborated here.` : "" diff --git a/src/types.ts b/src/types.ts index 41f30d9..7d9ddf6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -21,11 +21,18 @@ export type ReadyToGeneratePage = { messages: MessageParam[]; }; +export type CheckboxInput = { + name: string; + value: string; + checked: boolean; +}; + export type WizardState = Partial<{ gotDirectoryPermission: boolean; smarterModel: string; streamToConsole: boolean; - primarySourceFilename: string; + primarySourceAccessName: string; + primarySourceType: 'youtube' | 'file' | 'folder' | 'freetext'; loadedPrimarySource: string; anthropicKey: string; description: string; diff --git a/src/utils.ts b/src/utils.ts index 6c3273f..3521ee7 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -40,10 +40,7 @@ interface NonWhitespaceCharacter { index: number; } -function getNonWhitespaceCharacterOfStringAt( - s: string, - index: number -): NonWhitespaceCharacter { +function getNonWhitespaceCharacterOfStringAt(s: string, index: number): NonWhitespaceCharacter { let i = index; while (s[i].match(/\s/) !== null) { @@ -122,11 +119,7 @@ export function partialParse(str: string): any { lastKV !== "true" && lastKV !== "null" && lastKV.match(/^\d+$/) === null && - !( - lastKV.length !== 1 && - lastKV[0] === '"' && - lastKV[lastKV.length - 1] === '"' - ) + !(lastKV.length !== 1 && lastKV[0] === '"' && lastKV[lastKV.length - 1] === '"') ) { s = s.slice(0, j); } diff --git a/tsup.config.js b/tsup.config.js index b618c13..612a9fa 100644 --- a/tsup.config.js +++ b/tsup.config.js @@ -1,9 +1,16 @@ import { defineConfig } from "tsup"; export default defineConfig({ - entry: ["src/app.ts"], + entry: [ + "src/app.ts", + "src/folder-importing/worker-clean-dirtree.ts", + "src/folder-importing/worker-dirtree.ts", + "src/folder-importing/worker-flatten-tree-for-checkbox.ts", + "src/folder-importing/worker-remove-deselected.ts", + ], + outDir: "dist", publicDir: false, clean: true, minify: true, - format: ["cjs", "esm"] // 👈 Node + format: ["cjs"], // 👈 Node, });