1. fixed memory leak issues in multi-session scenarios.

stevenlu137 · stevenlu137 · commit 2c073ab6e717 · 2025-02-20T10:02:16.000+08:00
diff --git a/ailice/modules/ASpeech.py b/ailice/modules/ASpeech.py
@@ -14,12 +14,13 @@ def strip(txt: str) -> str:
     translation_table = str.maketrans("", "", string.whitespace)
     return txt.translate(translation_table)
 
+t2s = None
+s2t = None
+
 class ASpeech():
     def __init__(self):
         self.textQue = queue.Queue(maxsize=100)
         self.audioQue = queue.Queue(maxsize=100)
-        self.t2s = None
-        self.s2t = None
 
         self.inputDone = True
         self.lock = threading.Lock()
@@ -39,30 +40,39 @@ def ModuleInfo(self):
                                               "SWITCH-TONE": {"func": "SwitchTone", "prompt": "Switch the TTS system to a new tone.", "type": "primary"}}}
     
     def PrepareModel(self):
-        if None in [self.t2s, self.s2t]:
-            self.t2s = T2S_ChatTTS()
-            self.s2t = S2T_WhisperLarge()
+        global s2t, t2s
+
+        if None in [t2s, s2t]:
+            t2s = T2S_ChatTTS()
+            s2t = S2T_WhisperLarge()
         return
     
     def SetDevices(self, deviceMap: dict[str,str]):
+        global s2t, t2s
+
         if "stt" in deviceMap:
-            self.s2t.To(deviceMap['stt'])
+            s2t.To(deviceMap['stt'])
         elif "tts" in deviceMap:
-            self.t2s.To(deviceMap['tts'])
+            t2s.To(deviceMap['tts'])
         return
     
     def Speech2Text(self, wav: np.ndarray, sr: int) -> str:
-        return self.s2t.recognize(audio_data_to_numpy((wav, sr)))
+        global s2t
+        return s2t.recognize(audio_data_to_numpy((wav, sr)))
 
     def Text2Speech(self, txt: str) -> tuple[np.ndarray, int]:
+        global t2s
+        
         if (None == txt) or ("" == strip(txt)):
             return (np.zeros(1), 24000)
-        return self.t2s(txt)
+        return t2s(txt)
     
     def GetAudio(self) -> str:
+        global s2t
+
         self.inputDone = True
         with self.lock:
-            ret = self.s2t()
+            ret = s2t()
         return ret
     
     def Speak(self, txt: str):
@@ -74,15 +84,17 @@ def Speak(self, txt: str):
         return
     
     def SwitchTone(self) -> str:
-        return self.t2s.SwitchTone()
+        global t2s
+        return t2s.SwitchTone()
     
     def ProcessText(self):
+        global t2s
         while True:
             #The inter-thread synchronization issue here is more complex than it appears.
             self.noTextLeft = (self.inputDone and self.textQue.empty())
             text = self.textQue.get()
             try:
-                self.audioQue.put(self.t2s(text))
+                self.audioQue.put(t2s(text))
             except Exception as e:
                 print('EXCEPTION in ProcessText(). continue. e: ',str(e))
                 continue
diff --git a/ailice/modules/AStorageVecDB.py b/ailice/modules/AStorageVecDB.py
@@ -22,10 +22,11 @@
 MODEL = 'nomic-ai/nomic-embed-text-v1-GGUF'
 FILE_NAME = 'nomic-embed-text-v1.Q8_0.gguf'
 
+model = None
+modelLock = Lock()
+
 class AStorageVecDB():
     def __init__(self):
-        self.model = None
-        self.modelLock = Lock()
         self.data = {"model": MODEL, "file": FILE_NAME, "collections": {}}
         self.dir = None
         self.buffers = {}
@@ -38,8 +39,9 @@ def ModuleInfo(self):
         return {"NAME": "storage", "ACTIONS": {}}
 
     def CalcEmbeddings(self, txts: list[str]):
-        with self.modelLock:
-            return np.array(self.model.embed(txts))
+        global model, modelLock
+        with modelLock:
+            return np.array(model.embed(txts))
     
     def Hippocampus(self):
         while True:
@@ -73,30 +75,33 @@ def Load(self, dir):
         return
 
     def PrepareModel(self) -> str:
+        global model, modelLock
+
         ggufFile = hf_hub_download(repo_id=self.data['model'],filename=self.data['file'])
-        if self.model and ggufFile == self.model.model_path:
-            return f"Embedding model {self.model} has already been loaded."
-        
-        if "llama_cpp" == INFERENCE_ENGINE:
-            self.model = Llama(
-                model_path=ggufFile,
-                embedding=True,
-                n_gpu_layers=-1, # Uncomment to use GPU acceleration
-                # seed=1337, # Uncomment to set a specific seed
-                # n_ctx=2048, # Uncomment to increase the context window
-            )
-            return "Embedding model has been loaded."
-        elif "gpt4all" == INFERENCE_ENGINE:
-            gpus = []
-            try:
-                gpus = GPT4All.list_gpus()
-                device = gpus[0] if len(gpus) > 0 else "cpu"
-            except Exception as e:
-                device = "cpu"
-            self.model = Embed4All(ggufFile, device = device)
-            return f"GPUs found on this device: {gpus}. Embedding model has been loaded on {device}."
-        else:
-            return "No inference engine was found. Please use one of the following commands to install: `pip install gpt4all` or `ailice_turbo`."
+        with modelLock:
+            if model and ggufFile == model.model_path:
+                return f"Embedding model {self.data['model']} has already been loaded."
+            
+            if "llama_cpp" == INFERENCE_ENGINE:
+                model = Llama(
+                    model_path=ggufFile,
+                    embedding=True,
+                    n_gpu_layers=-1, # Uncomment to use GPU acceleration
+                    # seed=1337, # Uncomment to set a specific seed
+                    # n_ctx=2048, # Uncomment to increase the context window
+                )
+                return "Embedding model has been loaded."
+            elif "gpt4all" == INFERENCE_ENGINE:
+                gpus = []
+                try:
+                    gpus = GPT4All.list_gpus()
+                    device = gpus[0] if len(gpus) > 0 else "cpu"
+                except Exception as e:
+                    device = "cpu"
+                model = Embed4All(ggufFile, device = device)
+                return f"GPUs found on this device: {gpus}. Embedding model has been loaded on {device}."
+            else:
+                return "No inference engine was found. Please use one of the following commands to install: `pip install gpt4all` or `ailice_turbo`."
     
     def Open(self, directory: str) -> str:
         try: