Vision Support Update (0.0.4.5) (#36)

* Updated turbo. Bugs remain. * Removed deprecated models from model list. Fixed bugs with icons. * Added support for GPT-4-Turbo & text streaming. * Update README.md * Update README.md * Added GPT-4o support. * Mid-bugfix. * Bugfix in progress. * Debugged build with GPT-4o support. Note that the ChatGPT interface is a little bit buggy. * Replaced GPT-4o with GPT-4-turbo as the default model. * Added image uploading. * Added gallery and multi-image support. * Header fix & image support toggle (for models that don't support image input). * Added image support for the ChatGPT interface. * Quick ChatGPT no-selection bug fix. * Added keyboard shortcuts. * Added screenshot support for the Prompt-on-the-Fly interface. * Fixed drag formatting. Note: Screenshots / image uploads don't work well when multiple popups are open. * Fixed bug where images would be shared across multiple popups. Added escape-to-cancel for screenshots. * Quick image-sharing bugfix. * Fixed bug where image order would not be retained after some images are closed. * Fixed bug where prompt-on-the-fly wouldn't incorporate selected text into the chatbox on an empty prompt. * Cleaned up debug statements for feature release. * micro fix for the title in the popup, less cluttered. * better fix than previous commit. get the last message of the user as title with or without image (content or content[0].text) --------- Co-authored-by: giosilvi <[email protected]>
giosilvi · Jun 9, 2024 · 7f531dd · 7f531dd
1 parent c6311a8
commit 7f531dd
Show file tree

Hide file tree

Showing 7 changed files with 595 additions and 89 deletions.
diff --git a/README.md b/README.md
@@ -13,7 +13,6 @@
 4. Go to `chrome://extensions`. If you use Brave or another Chromium-based browser, replace the prefix with the name of your browser (e.g `brave://extensions`).
 5. Click "Load unpacked" and select your newly unzipped folder:
 
-
 ### Usage:
 1. Make a profile at [OpenAI](https://beta.openai.com/), if you haven't already
 2. From https://beta.openai.com/account/api-keys you can copy your API Key and paste into the chrome extension (we never have access to your API Key, it is stored on your Chrome profile).

diff --git a/src/background.js b/src/background.js
@@ -194,6 +194,7 @@ chrome.runtime.onInstalled.addListener(function (details) {
 
 // Listen for a signal to refresh the context menu
 chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
+  // console.log("Received message:", message);
   // If the signal is to refresh the context menu
   if (message.text === "newPromptList") {
     createContextMenu();
@@ -221,6 +222,38 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
       .then((resp) => sendResponse({ data: resp.text }))
       .catch((error) => sendResponse({ error: error.message }));
     return true; // Required to use sendResponse asynchronously
+  } else if (message.action === "takeScreenCapture"){
+    const { tab } = sender;
+    console.log("Taking screen capture!");
+    // Check for permissions
+    // Debug: Show permissions
+    chrome.permissions.getAll(function (permissions) {
+      // console.log("Permissions: ", permissions);
+    });
+    chrome.permissions.contains({ permissions: ["activeTab"] }, function (screenCapturePerms) {
+      if (screenCapturePerms){
+        // console.log("Perms available!");
+        chrome.tabs.captureVisibleTab(tab.windowId, { format: "png" }, function (dataUrl) {
+          sendResponse({ data: dataUrl });
+        });
+      }
+      else {
+        chrome.permissions.request({ permissions: ["activeTab"] }, function (granted) {
+          if (granted) {
+            console.log("Perms granted!");
+            chrome.tabs.captureVisibleTab(tab.windowId, { format: "png" }, function (dataUrl) {
+              sendResponse({ data: dataUrl });
+            });
+          }
+          else {
+            console.log("Perms rejected");
+            sendResponse({ error: "Permission denied" });
+          }
+        });
+      }
+    });
+
+    return true;
   } else {
     console.log("Unknown message: ", message);
   }
@@ -264,10 +297,12 @@ chrome.tabs.onUpdated.addListener(function (tabId, changeInfo, tab) {
 });
 
 function replacePlaceHolder(selectionText) {
+  // console.log("Selection text:", selectionText);
   // if there is a text /#TEXT#/g inside selectionText replace with nothing, and use the position to set the cursor later
   if (typeof selectionText == "undefined") {
     selectionText = "";
   }
+  // console.log("Selection text:", selectionText);
   var cursorPosition = selectionText.search(/#TEXT#/g);
   if (cursorPosition !== -1) {
     selectionText = selectionText.replace(/#TEXT#/g, "");
@@ -278,7 +313,21 @@ function replacePlaceHolder(selectionText) {
 function launchPopUpInPage(selectionText, prompt, command) {
   // replace the placeholder
   if (command == "showPopUpOnTheFly") {
-    var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
+    if (selectionText){
+      // console.log(selectionText[0]);
+      // console.log(selectionText[0].content);
+      if (selectionText[0].content){
+        // Legacy: May be safe to remove.
+        let userSelection = selectionText[1] ? selectionText[1].content : selectionText[0].content;
+        var [selectionText, cursorPosition] = replacePlaceHolder(userSelection);
+      }
+      else {
+        var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
+      }
+    }
+    else{
+      var [selectionText, cursorPosition] = replacePlaceHolder(selectionText);
+    }
   } else if (command == "showPopUpChatGPT") {
     // loop over the selectionText and replace the placeholder
     for (var i = 0; i < selectionText.length; i++) {
@@ -420,12 +469,12 @@ chrome.contextMenus.onClicked.addListener(async (info, tabs) => {
               })();
             });
           } else {
-            if (prompt.model in CHAT_API_MODELS) {
-              console.log("Chat GPT", prompt);
-              launchPopUpInPage(prompt.prompt, prompt, "showPopUpChatGPT");
-            } else {
+            // if (prompt.model in CHAT_API_MODELS) {
+            //   console.log("Chat GPT", prompt);
+            //   launchPopUpInPage(prompt.prompt, prompt, "showPopUpChatGPT");
+            // } else {
               launchPopUpInPage(prompt.prompt, prompt, "showPopUpOnTheFly");
-            }
+            // }
           }
         } else {
           // If the prompt number is invalid, send an error message to the tab and log a message to the console

diff --git a/src/content.js b/src/content.js
@@ -134,7 +134,7 @@ function checkIdPopup(id) {
 
 const buffers = {};
 
-function handleDataChunk(uuid, dataChunk, request) {
+function handleDataChunk(uuid, dataChunk, request, target_id) {
   // Initialize buffer for this uuid if not already present
   buffers[uuid] = buffers[uuid] || "";
 
@@ -159,33 +159,37 @@ function handleDataChunk(uuid, dataChunk, request) {
       const completeJsonObjectStr = buffers[uuid].substring(0, endOfObjectPos + 1);
 
       // Process the complete JSON object
-      processJsonObject(completeJsonObjectStr, uuid, request);
+      processJsonObject(completeJsonObjectStr, target_id, request);
 
       // Remove the processed data from the buffer
       buffers[uuid] = buffers[uuid].substring(endOfObjectPos + 2);
     }
   }
   if (buffers[uuid].includes("[DONE]")) {
-    processJsonObject("[DONE]", uuid, request);
+    processJsonObject("[DONE]", target_id, request);
   }
 }
 function sendStopSignal(request,uuid) {
   console.log(`Sending stop signal for ${uuid}`);
   popUpShadow.updatepopup(request, uuid, false);
 }
 
-function processJsonObject(jsonStr, uuid, request) {
+function processJsonObject(jsonStr, target_id, request) {
   // console.log("jsonStr:", jsonStr, uuid, request);
   try {
       // Check for the [DONE] marker
       if (jsonStr === "[DONE]") {
-        popUpShadow.updatepopup(request, uuid, false);
+        popUpShadow.updatepopup(request, target_id, false);
         return;
       }
 
       // Otherwise, parse and process the JSON object
       // console.log("About to process JSON.");
+
+      // Remove newlines
+      jsonStr = jsonStr.replace(/\n/g, "");
       const jsonObject = JSON.parse(jsonStr);
+
       // console.log(`Processing JSON object for ${uuid}:`, jsonObject);
 
       // Check for an error property in the JSON object
@@ -195,13 +199,14 @@ function processJsonObject(jsonStr, uuid, request) {
       //     return;
       // }
 
-      popUpShadow.updatepopup(jsonObject, uuid, true);  // Assuming uuid maps to idPopup
+      popUpShadow.updatepopup(jsonObject, target_id, true);  // Assuming uuid maps to idPopup
 
       // Once a valid JSON object has been processed, send a stop signal
       // sendStopSignal(request,uuid);
 
   } catch (e) {
       console.error("Failed to parse JSON object:", e);
+      console.log(jsonStr);
   }
 }
 
@@ -249,16 +254,17 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
       try {
         // console.log("Request:", request);
         // console.log(popUpShadow.stop_stream, popUpShadow.listOfUndesiredStreams);
-        if (popUpShadow.stop_stream && !popUpShadow.listOfUndesiredStreams.includes(request.uuid)) {
-          console.log("Stop stream with uuid", request.uuid);
-          popUpShadow.listOfUndesiredStreams.push(request.uuid);
-          delete buffers[request.uuid];  // Clear the buffer for this stream
-          popUpShadow.stop_stream = false;
-          popUpShadow.clearnewlines = true;
+        if (!popUpShadow.shadowRoot.getElementById(idPopup) || popUpShadow.stop_stream === idPopup) {
+          if (!popUpShadow.listOfUndesiredStreams.includes(request.uuid)){
+            console.log("Stop stream with uuid", request.uuid);
+            popUpShadow.listOfUndesiredStreams.push(request.uuid);
+            delete buffers[request.uuid];  // Clear the buffer for this stream
+            popUpShadow.stop_stream = false;
+            popUpShadow.clearnewlines = true;
+          }
         }
         if (!popUpShadow.listOfUndesiredStreams.includes(request.uuid)) {
-          handleDataChunk(request.uuid, request.text, request);
-          // processJsonObject(request.text,idPopup,  request);
+          handleDataChunk(request.uuid, request.text, request, idPopup);
         }
       } catch (e) {
         console.error(e);

diff --git a/src/gpt3.js b/src/gpt3.js
@@ -8,9 +8,13 @@ export const CHAT_API_MODELS = {
   "gpt-4o": true
 };
 
+export const VISION_SUPPORTED_MODELS = {
+  "gpt-4-turbo": true,
+  "gpt-4o": true
+}
+
+// For models that have a maximum token limit (input + output tokens per request).
 var MaxTokensPerModel = {
-  "gpt-4o": 4000,
-  "gpt-4-turbo": 4096,
   "gpt-4": 8000,
   "gpt-3.5-turbo": 4000,
   "gpt-3.5-turbo-instruct": 4000,
@@ -21,6 +25,18 @@ var MaxTokensPerModel = {
   "text-ada-001": 2000
 };
 
+// Note: This is the number of maximum output tokens (not the context window size).
+const MaxOutputTokensPerModel = {
+  "gpt-4o": 4000,
+  "gpt-4-turbo": 4096
+}
+
+const MaxInputTokensPerModel = {
+  "gpt-4o": 4000,
+  "gpt-4-turbo": 4096
+
+}
+
 const DECOUPLED_INPUT_OUTPUT_LENGTH_MODELS = {
   "gpt-4-turbo": true,
   "gpt-4o": true
@@ -30,12 +46,32 @@ function checkMaxTokens(content, model) {
   var tokens = 0;
   if (model in CHAT_API_MODELS) {
     // check the tokens in the text, for each "content" key
-    // var content = JSON.parse(text);
+    // console.log("Original content:", content);
+    if (content[0].role === "user"){
+      // Request came from prompt-on-the-fly
+      if (content[0].content.length > 0 && content[0].content[0].type) {
+        content = [content[0].content[0].text];
+        // console.log("Cropping content", content);
+      }
+      else{
+        content = [content[0].content];
+      }
+    }
+    else{
+      // Request came from ChatGPT interface
+      let tmp = [];
+      for (var i = 0; i < content.length; i++) {
+        if (content[i].content.length > 0 && content[i].content[0].type) tmp.push(content[i].content[0].text);
+        else tmp.push(content[i].content);
+      }
+      content = tmp;
+    }
+
+    // Content should be a list of strings
     for (var i = 0; i < content.length; i++) {
       tokens += 4; // every message follows <im_start>{role/name}\n{content}<im_end>\n
-      var singleMsgToken = countTokens(content[i]["content"]);
+      var singleMsgToken = countTokens(content[i]);
       tokens += singleMsgToken;
-      console.log(singleMsgToken, content[i]["content"]);
       tokens += 2; // every reply is primed with <im_start>assistant
     }
   } else {
@@ -45,7 +81,6 @@ function checkMaxTokens(content, model) {
   if (model in DECOUPLED_INPUT_OUTPUT_LENGTH_MODELS) {
     maxTokens = MaxTokensPerModel[model];
   }
-  console.log("model", model, "maxTokens", maxTokens, "tokens", tokens);
   return { maxTokens, tokens };
 }
 
@@ -59,9 +94,9 @@ function countTokens(text, model) {
 
 
 function checkTabsAndSendStream(message, tabs, string, bodyData, idpopup, uuid, tokens_sent) {
-   if (typeof text === "object") {
-      text = text[text.length - 1]["content"];
-    }
+  if (typeof text === "object") {
+    text = text[text.length - 1]["content"];
+  }
   if (tabs.id == -1) {
     //pdf case
     // console.log("pdf case");
@@ -76,24 +111,24 @@ function checkTabsAndSendStream(message, tabs, string, bodyData, idpopup, uuid,
 }
 
 function sendStream(message, id, string, bodyData, idpopup, uuid, tokens_sent = 0) {
-  chrome.tabs.sendMessage(id, {
+  let messageObj = {
     message: message,
     text: string,
     bodyData: bodyData,
     id_popup: idpopup,
     uuid: uuid,
     tokens_sent: tokens_sent,
-  }); //send the completion to the content script
+  };
+  chrome.tabs.sendMessage(id, messageObj); //send the completion to the content script
 }
 
 async function promptGPT3Prompting(prompt, items, tabs) {
   var text = prompt["prompt"];
   var model = prompt["model"];
   // if the model is gpt-4 or gpt-3.5-turbo, we need to check that the text is a valid json
   if (model in CHAT_API_MODELS) {
-    console.log('Check',typeof text)
-    if (typeof text !== "object") 
-     {text = [{"role": "user", "content": text}];}
+    console.log('Check', typeof text)
+    if (typeof text !== "object") { text = [{ "role": "user", "content": text }]; }
   }
   else {
     //we check that text is a string, if is JSON just take the last elemet value corresponding to the key "content"
@@ -106,7 +141,8 @@ async function promptGPT3Prompting(prompt, items, tabs) {
   var uuid = Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
   //send immediately text to the content script
   var { url, str_bodyData, bodyData, tokens } = chooseCompletion(model, temperature, text);
-  console.log("Debug1", url, str_bodyData, tokens);
+  let keepStreaming = true;
+
   fetch(url, {
     method: "POST",
     headers: {
@@ -123,6 +159,7 @@ async function promptGPT3Prompting(prompt, items, tabs) {
       return pump();
 
       function pump() {
+
         return reader.read().then(({ done, value }) => {
           // When no more data needs to be consumed, close the stream
           if (done) {
@@ -133,7 +170,7 @@ async function promptGPT3Prompting(prompt, items, tabs) {
           var stream = new TextDecoder().decode(value); //.substring(6);
           // console.log(string, typeof string);
           // if tabs.id == -1 then use querySelector to get the tab
-          checkTabsAndSendStream("GPTStream_completion", tabs, stream, str_bodyData, popupID, uuid);
+          checkTabsAndSendStream("GPTStream_completion", tabs, stream, str_bodyData, popupID, uuid, null);
           return pump();
         });
       }

diff --git a/src/manifest.json b/src/manifest.json
@@ -1,14 +1,14 @@
 {
   "name": "GPT-Prompter",
-  "version": "0.0.4.4",
+  "version": "0.0.4.5",
   "description": "Fast custom prompts to GPT-3.5, GPT-4 and ChatGPT API",
   "manifest_version": 3,
   "icons": {
     "16": "icons/iconA16.png",
     "48": "icons/NewiconA48.png",
     "128": "icons/NewiconA128.png"
   },
-  "permissions": ["contextMenus", "storage", "commands"],
+  "permissions": ["contextMenus", "storage", "commands", "activeTab"],
   "content_scripts": [
     {
       "matches": ["<all_urls>"],

diff --git a/src/popup.js b/src/popup.js
@@ -30,7 +30,7 @@ function makePromptList(items) {
     var modelText = document.createElement("span");
     modelText.className = "feature-text";
     modelText.innerText = items.customprompt[i]["model"];
-    modelText.setAttribute("data-title", "Model:");
+    modelText.setAttribute("data-title", "Model: ");
 
     var promptText = document.createElement("span");
     promptText.className = "prompt-text";
@@ -959,7 +959,7 @@ function newOrderFromID() {
   [].forEach.call(listItens, function (item) {
     list.push(item.id);
   });
-  console.log("list", list);
+  // console.log("list", list);
   return list;
 }