Skip to content

Commit

Permalink
Vision Support Update (0.0.4.5) (#36)
Browse files Browse the repository at this point in the history
* Updated turbo. Bugs remain.

* Removed deprecated models from model list. Fixed bugs with icons.

* Added support for GPT-4-Turbo & text streaming.

* Update README.md

* Update README.md

* Added GPT-4o support.

* Mid-bugfix.

* Bugfix in progress.

* Debugged build with GPT-4o support. Note that the ChatGPT interface is a little bit buggy.

* Replaced GPT-4o with GPT-4-turbo as the default model.

* Added image uploading.

* Added gallery and multi-image support.

* Header fix & image support toggle (for models that don't support image input).

* Added image support for the ChatGPT interface.

* Quick ChatGPT no-selection bug fix.

* Added keyboard shortcuts.

* Added screenshot support for the Prompt-on-the-Fly interface.

* Fixed drag formatting. Note: Screenshots / image uploads don't work well when multiple popups are open.

* Fixed bug where images would be shared across multiple popups. Added escape-to-cancel for screenshots.

* Quick image-sharing bugfix.

* Fixed bug where image order would not be retained after some images are closed.

* Fixed bug where prompt-on-the-fly wouldn't incorporate selected text into the chatbox on an empty prompt.

* Cleaned up debug statements for feature release.

* micro fix for the title in the popup, less cluttered.

* better fix than previous commit. get the last message of the user as title with or without image (content or content[0].text)

---------

Co-authored-by: giosilvi <[email protected]>
  • Loading branch information
wz-ml and giosilvi authored Jun 9, 2024
1 parent c6311a8 commit 7f531dd
Show file tree
Hide file tree
Showing 7 changed files with 595 additions and 89 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
4. Go to `chrome://extensions`. If you use Brave or another Chromium-based browser, replace the prefix with the name of your browser (e.g `brave://extensions`).
5. Click "Load unpacked" and select your newly unzipped folder:


### Usage:
1. Make a profile at [OpenAI](https://beta.openai.com/), if you haven't already
2. From https://beta.openai.com/account/api-keys you can copy your API Key and paste into the chrome extension (we never have access to your API Key, it is stored on your Chrome profile).
Expand Down
61 changes: 55 additions & 6 deletions src/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ chrome.runtime.onInstalled.addListener(function (details) {

// Listen for a signal to refresh the context menu
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
// console.log("Received message:", message);
// If the signal is to refresh the context menu
if (message.text === "newPromptList") {
createContextMenu();
Expand Down Expand Up @@ -221,6 +222,38 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
.then((resp) => sendResponse({ data: resp.text }))
.catch((error) => sendResponse({ error: error.message }));
return true; // Required to use sendResponse asynchronously
} else if (message.action === "takeScreenCapture"){
const { tab } = sender;
console.log("Taking screen capture!");
// Check for permissions
// Debug: Show permissions
chrome.permissions.getAll(function (permissions) {
// console.log("Permissions: ", permissions);
});
chrome.permissions.contains({ permissions: ["activeTab"] }, function (screenCapturePerms) {
if (screenCapturePerms){
// console.log("Perms available!");
chrome.tabs.captureVisibleTab(tab.windowId, { format: "png" }, function (dataUrl) {
sendResponse({ data: dataUrl });
});
}
else {
chrome.permissions.request({ permissions: ["activeTab"] }, function (granted) {
if (granted) {
console.log("Perms granted!");
chrome.tabs.captureVisibleTab(tab.windowId, { format: "png" }, function (dataUrl) {
sendResponse({ data: dataUrl });
});
}
else {
console.log("Perms rejected");
sendResponse({ error: "Permission denied" });
}
});
}
});

return true;
} else {
console.log("Unknown message: ", message);
}
Expand Down Expand Up @@ -264,10 +297,12 @@ chrome.tabs.onUpdated.addListener(function (tabId, changeInfo, tab) {
});

function replacePlaceHolder(selectionText) {
// console.log("Selection text:", selectionText);
// if there is a text /#TEXT#/g inside selectionText replace with nothing, and use the position to set the cursor later
if (typeof selectionText == "undefined") {
selectionText = "";
}
// console.log("Selection text:", selectionText);
var cursorPosition = selectionText.search(/#TEXT#/g);
if (cursorPosition !== -1) {
selectionText = selectionText.replace(/#TEXT#/g, "");
Expand All @@ -278,7 +313,21 @@ function replacePlaceHolder(selectionText) {
function launchPopUpInPage(selectionText, prompt, command) {
// replace the placeholder
if (command == "showPopUpOnTheFly") {
var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
if (selectionText){
// console.log(selectionText[0]);
// console.log(selectionText[0].content);
if (selectionText[0].content){
// Legacy: May be safe to remove.
let userSelection = selectionText[1] ? selectionText[1].content : selectionText[0].content;
var [selectionText, cursorPosition] = replacePlaceHolder(userSelection);
}
else {
var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
}
}
else{
var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
}
} else if (command == "showPopUpChatGPT") {
// loop over the selectionText and replace the placeholder
for (var i = 0; i < selectionText.length; i++) {
Expand Down Expand Up @@ -420,12 +469,12 @@ chrome.contextMenus.onClicked.addListener(async (info, tabs) => {
})();
});
} else {
if (prompt.model in CHAT_API_MODELS) {
console.log("Chat GPT", prompt);
launchPopUpInPage(prompt.prompt, prompt, "showPopUpChatGPT");
} else {
// if (prompt.model in CHAT_API_MODELS) {
// console.log("Chat GPT", prompt);
// launchPopUpInPage(prompt.prompt, prompt, "showPopUpChatGPT");
// } else {
launchPopUpInPage(prompt.prompt, prompt, "showPopUpOnTheFly");
}
// }
}
} else {
// If the prompt number is invalid, send an error message to the tab and log a message to the console
Expand Down
34 changes: 20 additions & 14 deletions src/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ function checkIdPopup(id) {

const buffers = {};

function handleDataChunk(uuid, dataChunk, request) {
function handleDataChunk(uuid, dataChunk, request, target_id) {
// Initialize buffer for this uuid if not already present
buffers[uuid] = buffers[uuid] || "";

Expand All @@ -159,33 +159,37 @@ function handleDataChunk(uuid, dataChunk, request) {
const completeJsonObjectStr = buffers[uuid].substring(0, endOfObjectPos + 1);

// Process the complete JSON object
processJsonObject(completeJsonObjectStr, uuid, request);
processJsonObject(completeJsonObjectStr, target_id, request);

// Remove the processed data from the buffer
buffers[uuid] = buffers[uuid].substring(endOfObjectPos + 2);
}
}
if (buffers[uuid].includes("[DONE]")) {
processJsonObject("[DONE]", uuid, request);
processJsonObject("[DONE]", target_id, request);
}
}
function sendStopSignal(request,uuid) {
console.log(`Sending stop signal for ${uuid}`);
popUpShadow.updatepopup(request, uuid, false);
}

function processJsonObject(jsonStr, uuid, request) {
function processJsonObject(jsonStr, target_id, request) {
// console.log("jsonStr:", jsonStr, uuid, request);
try {
// Check for the [DONE] marker
if (jsonStr === "[DONE]") {
popUpShadow.updatepopup(request, uuid, false);
popUpShadow.updatepopup(request, target_id, false);
return;
}

// Otherwise, parse and process the JSON object
// console.log("About to process JSON.");

// Remove newlines
jsonStr = jsonStr.replace(/\n/g, "");
const jsonObject = JSON.parse(jsonStr);

// console.log(`Processing JSON object for ${uuid}:`, jsonObject);

// Check for an error property in the JSON object
Expand All @@ -195,13 +199,14 @@ function processJsonObject(jsonStr, uuid, request) {
// return;
// }

popUpShadow.updatepopup(jsonObject, uuid, true); // Assuming uuid maps to idPopup
popUpShadow.updatepopup(jsonObject, target_id, true); // Assuming uuid maps to idPopup

// Once a valid JSON object has been processed, send a stop signal
// sendStopSignal(request,uuid);

} catch (e) {
console.error("Failed to parse JSON object:", e);
console.log(jsonStr);
}
}

Expand Down Expand Up @@ -249,16 +254,17 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
try {
// console.log("Request:", request);
// console.log(popUpShadow.stop_stream, popUpShadow.listOfUndesiredStreams);
if (popUpShadow.stop_stream && !popUpShadow.listOfUndesiredStreams.includes(request.uuid)) {
console.log("Stop stream with uuid", request.uuid);
popUpShadow.listOfUndesiredStreams.push(request.uuid);
delete buffers[request.uuid]; // Clear the buffer for this stream
popUpShadow.stop_stream = false;
popUpShadow.clearnewlines = true;
if (!popUpShadow.shadowRoot.getElementById(idPopup) || popUpShadow.stop_stream === idPopup) {
if (!popUpShadow.listOfUndesiredStreams.includes(request.uuid)){
console.log("Stop stream with uuid", request.uuid);
popUpShadow.listOfUndesiredStreams.push(request.uuid);
delete buffers[request.uuid]; // Clear the buffer for this stream
popUpShadow.stop_stream = false;
popUpShadow.clearnewlines = true;
}
}
if (!popUpShadow.listOfUndesiredStreams.includes(request.uuid)) {
handleDataChunk(request.uuid, request.text, request);
// processJsonObject(request.text,idPopup, request);
handleDataChunk(request.uuid, request.text, request, idPopup);
}
} catch (e) {
console.error(e);
Expand Down
69 changes: 53 additions & 16 deletions src/gpt3.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ export const CHAT_API_MODELS = {
"gpt-4o": true
};

export const VISION_SUPPORTED_MODELS = {
"gpt-4-turbo": true,
"gpt-4o": true
}

// For models that have a maximum token limit (input + output tokens per request).
var MaxTokensPerModel = {
"gpt-4o": 4000,
"gpt-4-turbo": 4096,
"gpt-4": 8000,
"gpt-3.5-turbo": 4000,
"gpt-3.5-turbo-instruct": 4000,
Expand All @@ -21,6 +25,18 @@ var MaxTokensPerModel = {
"text-ada-001": 2000
};

// Note: This is the number of maximum output tokens (not the context window size).
const MaxOutputTokensPerModel = {
"gpt-4o": 4000,
"gpt-4-turbo": 4096
}

const MaxInputTokensPerModel = {
"gpt-4o": 4000,
"gpt-4-turbo": 4096

}

const DECOUPLED_INPUT_OUTPUT_LENGTH_MODELS = {
"gpt-4-turbo": true,
"gpt-4o": true
Expand All @@ -30,12 +46,32 @@ function checkMaxTokens(content, model) {
var tokens = 0;
if (model in CHAT_API_MODELS) {
// check the tokens in the text, for each "content" key
// var content = JSON.parse(text);
// console.log("Original content:", content);
if (content[0].role === "user"){
// Request came from prompt-on-the-fly
if (content[0].content.length > 0 && content[0].content[0].type) {
content = [content[0].content[0].text];
// console.log("Cropping content", content);
}
else{
content = [content[0].content];
}
}
else{
// Request came from ChatGPT interface
let tmp = [];
for (var i = 0; i < content.length; i++) {
if (content[i].content.length > 0 && content[i].content[0].type) tmp.push(content[i].content[0].text);
else tmp.push(content[i].content);
}
content = tmp;
}

// Content should be a list of strings
for (var i = 0; i < content.length; i++) {
tokens += 4; // every message follows <im_start>{role/name}\n{content}<im_end>\n
var singleMsgToken = countTokens(content[i]["content"]);
var singleMsgToken = countTokens(content[i]);
tokens += singleMsgToken;
console.log(singleMsgToken, content[i]["content"]);
tokens += 2; // every reply is primed with <im_start>assistant
}
} else {
Expand All @@ -45,7 +81,6 @@ function checkMaxTokens(content, model) {
if (model in DECOUPLED_INPUT_OUTPUT_LENGTH_MODELS) {
maxTokens = MaxTokensPerModel[model];
}
console.log("model", model, "maxTokens", maxTokens, "tokens", tokens);
return { maxTokens, tokens };
}

Expand All @@ -59,9 +94,9 @@ function countTokens(text, model) {


function checkTabsAndSendStream(message, tabs, string, bodyData, idpopup, uuid, tokens_sent) {
if (typeof text === "object") {
text = text[text.length - 1]["content"];
}
if (typeof text === "object") {
text = text[text.length - 1]["content"];
}
if (tabs.id == -1) {
//pdf case
// console.log("pdf case");
Expand All @@ -76,24 +111,24 @@ function checkTabsAndSendStream(message, tabs, string, bodyData, idpopup, uuid,
}

function sendStream(message, id, string, bodyData, idpopup, uuid, tokens_sent = 0) {
chrome.tabs.sendMessage(id, {
let messageObj = {
message: message,
text: string,
bodyData: bodyData,
id_popup: idpopup,
uuid: uuid,
tokens_sent: tokens_sent,
}); //send the completion to the content script
};
chrome.tabs.sendMessage(id, messageObj); //send the completion to the content script
}

async function promptGPT3Prompting(prompt, items, tabs) {
var text = prompt["prompt"];
var model = prompt["model"];
// if the model is gpt-4 or gpt-3.5-turbo, we need to check that the text is a valid json
if (model in CHAT_API_MODELS) {
console.log('Check',typeof text)
if (typeof text !== "object")
{text = [{"role": "user", "content": text}];}
console.log('Check', typeof text)
if (typeof text !== "object") { text = [{ "role": "user", "content": text }]; }
}
else {
//we check that text is a string, if is JSON just take the last elemet value corresponding to the key "content"
Expand All @@ -106,7 +141,8 @@ async function promptGPT3Prompting(prompt, items, tabs) {
var uuid = Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
//send immediately text to the content script
var { url, str_bodyData, bodyData, tokens } = chooseCompletion(model, temperature, text);
console.log("Debug1", url, str_bodyData, tokens);
let keepStreaming = true;

fetch(url, {
method: "POST",
headers: {
Expand All @@ -123,6 +159,7 @@ async function promptGPT3Prompting(prompt, items, tabs) {
return pump();

function pump() {

return reader.read().then(({ done, value }) => {
// When no more data needs to be consumed, close the stream
if (done) {
Expand All @@ -133,7 +170,7 @@ async function promptGPT3Prompting(prompt, items, tabs) {
var stream = new TextDecoder().decode(value); //.substring(6);
// console.log(string, typeof string);
// if tabs.id == -1 then use querySelector to get the tab
checkTabsAndSendStream("GPTStream_completion", tabs, stream, str_bodyData, popupID, uuid);
checkTabsAndSendStream("GPTStream_completion", tabs, stream, str_bodyData, popupID, uuid, null);
return pump();
});
}
Expand Down
4 changes: 2 additions & 2 deletions src/manifest.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
"name": "GPT-Prompter",
"version": "0.0.4.4",
"version": "0.0.4.5",
"description": "Fast custom prompts to GPT-3.5, GPT-4 and ChatGPT API",
"manifest_version": 3,
"icons": {
"16": "icons/iconA16.png",
"48": "icons/NewiconA48.png",
"128": "icons/NewiconA128.png"
},
"permissions": ["contextMenus", "storage", "commands"],
"permissions": ["contextMenus", "storage", "commands", "activeTab"],
"content_scripts": [
{
"matches": ["<all_urls>"],
Expand Down
4 changes: 2 additions & 2 deletions src/popup.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ function makePromptList(items) {
var modelText = document.createElement("span");
modelText.className = "feature-text";
modelText.innerText = items.customprompt[i]["model"];
modelText.setAttribute("data-title", "Model:");
modelText.setAttribute("data-title", "Model: ");

var promptText = document.createElement("span");
promptText.className = "prompt-text";
Expand Down Expand Up @@ -959,7 +959,7 @@ function newOrderFromID() {
[].forEach.call(listItens, function (item) {
list.push(item.id);
});
console.log("list", list);
// console.log("list", list);
return list;
}

Expand Down
Loading

0 comments on commit 7f531dd

Please sign in to comment.