From 1caa7739f8ae79207b01dfa2e905b7f7ec77d0c6 Mon Sep 17 00:00:00 2001
From: kashi-x <kashimiya.exe@gmail.com>
Date: Mon, 13 May 2024 04:17:52 +0900
Subject: [PATCH] add init

---
 README.md                                    |   1 -
 README_jp.md                                 |  12 +
 src/emo.py                                   |  50 ----
 src/emo_trial.py                             |  44 ----
 src/f.py                                     | 105 ---------
 src/fast.py                                  | 110 ---------
 src/fast2.py                                 | 120 ----------
 src/fast3.py                                 | 137 -----------
 src/fast4.py                                 | 133 -----------
 src/fast5.py                                 | 126 ----------
 src/fast6.py                                 | 113 ---------
 src/fast7.py                                 | 148 ------------
 src/fast8.py                                 | 125 ----------
 src/fast9.py                                 | 229 -------------------
 dev/b.py => src/fast_voice2word.py           |   0
 src/foo.py                                   |  38 ---
 dev/a.py => src/word2emotion_and_plotting.py |  23 +-
 17 files changed, 27 insertions(+), 1487 deletions(-)
 delete mode 100644 README.md
 create mode 100644 README_jp.md
 delete mode 100644 src/emo.py
 delete mode 100644 src/emo_trial.py
 delete mode 100644 src/f.py
 delete mode 100644 src/fast.py
 delete mode 100644 src/fast2.py
 delete mode 100644 src/fast3.py
 delete mode 100644 src/fast4.py
 delete mode 100644 src/fast5.py
 delete mode 100644 src/fast6.py
 delete mode 100644 src/fast7.py
 delete mode 100644 src/fast8.py
 delete mode 100644 src/fast9.py
 rename dev/b.py => src/fast_voice2word.py (100%)
 delete mode 100644 src/foo.py
 rename dev/a.py => src/word2emotion_and_plotting.py (88%)

diff --git a/README.md b/README.md
deleted file mode 100644
index 6f7a1df..0000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# fast_word_emotion_analysis
\ No newline at end of file
diff --git a/README_jp.md b/README_jp.md
new file mode 100644
index 0000000..06af717
--- /dev/null
+++ b/README_jp.md
@@ -0,0 +1,12 @@
+# fast_word_emotion_analysis
+
+音声の入力を受けとり、音節の切れ目で、それまでの感情を計算します。
+arduinoを経由し、話を聞いてない人や耳が聞こえない人の首に電流を流して、感情に沿った適切なうなづきを実行させます。
+arduinoのコードは……実機の中にしかありません。
+
+動作方法
+
+srcの内部の二つのスクリプトを動作させる。二つとも、非同期で処理が走る。
+（それぞれ、分析の結果をfileのioを通してパスしている。デモをする際に、支給されたパソコンのスペックがあんまりであったため、処理の負荷を手元の携帯に分散させることができるように、fileのio経由で同期させている。）
+初回は、whisperモデルやbertのモデルのダウンロードが走るため、時間がかかる。
+また、cudaや音声入力の管理が必要なため、環境依存で修正しなければならない諸々が非常に多い。
diff --git a/src/emo.py b/src/emo.py
deleted file mode 100644
index ef21bfd..0000000
--- a/src/emo.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from transformers import pipeline, AutoModelForSequenceClassification, BertJapaneseTokenizer
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-model = AutoModelForSequenceClassification.from_pretrained(
-    "patrickramos/bert-base-japanese-v2-wrime-fine-tune"
-)
-tokenizer = BertJapaneseTokenizer.from_pretrained(
-    "cl-tohoku/bert-base-japanese-whole-word-masking"
-)
-nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
-
-
-# pipelineでの感情分析結果
-results = nlp("私はとっても幸せ")
-
-# readerに関する結果のみをフィルタリング
-reader_results = [result for result in results if "reader_" in result["label"]]
-values = [result["score"] for result in reader_results]
-
-# 感情の日本語訳
-emotion_translation = {
-    "surprise": "驚き",
-    "sadness": "悲しみ",
-    "fear": "恐れ",
-    "disgust": "嫌悪",
-    "anger": "怒り",
-    "anticipation": "期待",
-    "joy": "喜び",
-    "trust": "信頼",
-}
-
-# ラベルを日本語に変換
-labels = [emotion_translation[result["label"].split("_")[1]] for result in reader_results]
-
-# N-gramの設定 (ここでは2-gram)
-N = 2
-ngram_values = [np.mean(values[i : i + N]) for i in range(len(values) - N + 1)]
-ngram_labels = [f"{labels[i]}-{labels[i+1]}" for i in range(len(labels) - N + 1)]
-
-# プロット
-plt.figure(figsize=(10, 7))
-plt.bar(range(len(ngram_values)), ngram_values, color="skyblue", align="center")
-plt.xticks(range(len(ngram_values)), ngram_labels, rotation=45)
-plt.xlabel("N-gramの感情ペア")
-plt.ylabel("平均スコア")
-plt.title("Readerの感情分析 (N-gram)")
-plt.tight_layout()
-plt.show()
diff --git a/src/emo_trial.py b/src/emo_trial.py
deleted file mode 100644
index 2a1703e..0000000
--- a/src/emo_trial.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import matplotlib.pyplot as plt
-import matplotlib.animation as animation
-import numpy as np
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-
-plt.rcParams["font.family"] = "Meiryo"
-
-# モデルとトークナイザの準備
-tokenizer = AutoTokenizer.from_pretrained(
-    "Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime"
-)
-model = AutoModelForSequenceClassification.from_pretrained(
-    "Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime"
-)
-
-# 感情のリスト
-emotions = ["喜び", "悲しみ", "期待", "驚き", "怒り", "恐れ", "嫌悪", "信頼"]
-
-
-def get_emotion_probs(text):
-    token = tokenizer(
-        text, return_tensors="pt", truncation=True, max_length=512, padding="max_length"
-    )
-    output = model(**token)
-    normalized_logits = (output.logits - torch.min(output.logits)) / (
-        torch.max(output.logits) - torch.min(output.logits)
-    )
-    probs = normalized_logits.squeeze().tolist()
-    probs.append(probs[0])  # 最初の確率を最後にも追加
-    return probs
-
-
-fig = plt.figure(figsize=(4, 4))
-ax = fig.add_subplot(111, projection="polar")
-ax.set_ylim(0, 1)
-
-theta = np.linspace(0, 2 * np.pi, len(emotions) + 1, endpoint=True)  # 最後に最初の値を追加
-(l,) = ax.plot([], [])
-
-texts = ["すごく楽しかった。"]
-data = get_emotion_probs(texts[0])
-
-
diff --git a/src/f.py b/src/f.py
deleted file mode 100644
index c0782e4..0000000
--- a/src/f.py
+++ /dev/null
@@ -1,105 +0,0 @@
-
-import matplotlib.pyplot as plt
-import matplotlib.animation as animation
-import numpy as np
-import sounddevice as sd
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForSequenceClassification
-import queue
-
-plt.rcParams["font.family"] = "Meiryo"
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-# Initialize Whisper model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_name = "vumichien/whisper-small-ja"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-# Initialize sentiment analysis model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime")
-sentiment_model = AutoModelForSequenceClassification.from_pretrained("Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime").to(device)
-
-# Lists
-emotions = ["喜び", "悲しみ", "期待", "驚き", "怒り", "恐れ", "嫌悪", "信頼"]
-
-audio_queue = queue.Queue()
-global_ndarray = None
-
-running = True
-
-def get_emotion_probs(text):
-    token = tokenizer(
-        text, return_tensors="pt", truncation=True, max_length=512, padding="max_length"
-    )
-    output = sentiment_model(**token)
-    normalized_logits = (output.logits - torch.min(output.logits)) / (
-        torch.max(output.logits) - torch.min(output.logits)
-    )
-    probs = normalized_logits.squeeze().tolist()
-    probs.append(probs[0])  # 最初の確率を最後にも追加
-    return probs
-
-def audio_capture_thread():
-    with sd.InputStream(samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE) as stream:
-        while running:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-def update_plot(i):
-    global global_ndarray
-    
-    indata, _ = audio_queue.get_nowait()
-    
-    line.set_ydata(indata)
-    
-    indata_flattened = abs(indata.flatten())
-    is_significant_audio = np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-
-    if is_significant_audio:
-        if global_ndarray is not None:
-            global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-        else:
-            global_ndarray = indata
-    elif global_ndarray is not None:
-        if len(global_ndarray) < MIN_AUDIO_LENGTH:
-            return
-        indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-        global_ndarray = None
-        input_data = processor(indata_transformed, sampling_rate=16000, return_tensors="pt").input_features
-        input_data = input_data.half()
-        predicted_ids = model.generate(input_data.to(device), forced_decoder_ids=forced_decoder_ids)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-        data = get_emotion_probs(transcription[0])
-        radar_line.set_ydata(data)
-
-if __name__ == "__main__":
-    fig, axs = plt.subplots(2)
-    
-    # Audio waveform plot
-    (line,) = axs[0].plot(np.random.randn(BLOCKSIZE))
-    axs[0].set_ylim([-(2**15), 2**15 - 1])
-    axs[0].set_xlim(0, BLOCKSIZE)
-
-    # Sentiment radar chart
-    theta = np.linspace(0, 2 * np.pi, len(emotions) + 1, endpoint=True)
-    (radar_line,) = axs[1].plot(theta, [0] * (len(emotions) + 1))
-    axs[1].set_ylim(0, 1)
-
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    capture_thread.start()
-
-    ani = animation.FuncAnimation(fig, update_plot, interval=100, blit=False)
-
-    plt.show()
-
-    running = False
-    capture_thread.join()
\ No newline at end of file
diff --git a/src/fast.py b/src/fast.py
deleted file mode 100644
index 82495cd..0000000
--- a/src/fast.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import sounddevice as sd
-import numpy as np
-
-import matplotlib.pyplot as plt
-import whisper
-
-import asyncio
-import queue
-import sys
-
-
-# SETTINGS
-MODEL_TYPE = "base.en"
-# the model used for transcription. https://github.com/openai/whisper#available-models-and-languages
-LANGUAGE = "English"
-# pre-set the language to avoid autodetection
-BLOCKSIZE = 24678 // 5
-# this is the base chunk size the audio is split into in samples. blocksize / 16000 = chunk length in seconds.
-SILENCE_THRESHOLD = 700
-# should be set to the lowest sample amplitude that the speech in the audio material has
-SILENCE_RATIO = 2000
-# number of samples in one buffer that are allowed to be higher than threshold
-
-
-global_ndarray = None
-model = whisper.load_model(MODEL_TYPE)
-
-
-async def inputstream_generator():
-    """Generator that yields blocks of input data as NumPy arrays."""
-    q_in = asyncio.Queue()
-    loop = asyncio.get_event_loop()
-
-    def callback(indata, frame_count, time_info, status):
-        print("Received audio data.")  # Log when audio data is received.
-        loop.call_soon_threadsafe(q_in.put_nowait, (indata.copy(), status))
-
-    stream = sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE, callback=callback
-    )
-    with stream:
-        while True:
-            indata, status = await q_in.get()
-            print(
-                f"Yielding {len(indata)} frames of audio data."
-            )  # Log the amount of audio data being yielded.
-            yield indata, status
-
-
-plt.ion()
-fig, ax = plt.subplots()
-(line,) = ax.plot(np.random.randn(BLOCKSIZE))
-ax.set_ylim([-(2**15), 2**15 - 1])
-ax.set_xlim(0, BLOCKSIZE)
-
-
-async def process_audio_buffer():
-    global global_ndarray
-    async for indata, status in inputstream_generator():
-        indata_flattened = abs(indata.flatten())
-
-        line.set_ydata(indata)
-        plt.draw()
-        plt.pause(0.001)
-
-        # Log the size of non-silent data.
-        non_silent_size = np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size
-        print(f"Non-silent data size: {non_silent_size}")
-
-        if non_silent_size < SILENCE_RATIO:
-            print("Discarding buffer due to silence.")
-            continue
-
-        if global_ndarray is not None:
-            global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-        else:
-            global_ndarray = indata
-
-        avg_end_signal = np.average((indata_flattened[-100:-1]))
-        if avg_end_signal > SILENCE_THRESHOLD / 15:
-            print("Appending buffer as the end is not silent.")
-            continue
-        else:
-            local_ndarray = global_ndarray.copy()
-            global_ndarray = None
-            indata_transformed = local_ndarray.flatten().astype(np.float32) / 32768.0
-            result = model.transcribe(indata_transformed, language=LANGUAGE)
-            print(f"Transcription Result: {result['text']}")  # Log the transcription result.
-
-        del local_ndarray
-        del indata_flattened
-
-
-async def main():
-    print("\nActivating wire ...\n")
-    audio_task = asyncio.create_task(process_audio_buffer())
-    while True:
-        await asyncio.sleep(1)
-    audio_task.cancel()
-    try:
-        await audio_task
-    except asyncio.CancelledError:
-        print("\nwire was cancelled")
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        sys.exit("\nInterrupted by user")
diff --git a/src/fast2.py b/src/fast2.py
deleted file mode 100644
index fb1deff..0000000
--- a/src/fast2.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import asyncio
-import numpy as np
-import sounddevice as sd
-import sys
-import threading
-import curses
-import matplotlib.pyplot as plt
-import sounddevice as sd
-import numpy as np
-
-import whisper
-
-import asyncio
-import queue
-import sys
-
-
-# SETTINGS
-MODEL_TYPE = "base.en"
-# the model used for transcription. https://github.com/openai/whisper#available-models-and-languages
-LANGUAGE = "English"
-# pre-set the language to avoid autodetection
-BLOCKSIZE = 24678
-# this is the base chunk size the audio is split into in samples. blocksize / 16000 = chunk length in seconds.
-SILENCE_THRESHOLD = 700
-# should be set to the lowest sample amplitude that the speech in the audio material has
-SILENCE_RATIO = 2000
-# number of samples in one buffer that are allowed to be higher than threshold
-
-
-global_ndarray = None
-model = whisper.load_model(MODEL_TYPE)
-
-data_queue = asyncio.Queue()
-
-
-# Curses UI function
-def display_ui(data_queue):
-    stdscr = curses.initscr()
-    curses.noecho()
-    curses.cbreak()
-    stdscr.keypad(True)
-    stdscr.nodelay(1)  # non-blocking input
-    try:
-        while True:
-            stdscr.clear()
-            try:
-                data = data_queue.get_nowait()
-            except:
-                data = None
-
-            if data:
-                stdscr.addstr(0, 0, data)
-            stdscr.refresh()
-            curses.napms(100)  # Wait for 100ms
-    except KeyboardInterrupt:
-        pass
-    finally:
-        curses.endwin()
-
-
-# Real-time plotting function
-def realtime_plot():
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(BLOCKSIZE)
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    while True:
-        try:
-            indata = plot_queue.get()
-            line.set_ydata(indata)
-            plt.draw()
-            plt.pause(0.001)
-        except KeyboardInterrupt:
-            break
-
-
-# Your original inputstream generator here...
-
-# Modified process_audio_buffer function
-plot_queue = asyncio.Queue()
-
-
-async def process_audio_buffer():
-    global global_ndarray
-    async for indata, status in inputstream_generator():
-        indata_flattened = abs(indata.flatten())
-        non_silent_data_size = np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size
-
-        message = f"Non-silent data size: {non_silent_data_size} | "
-
-        # Append indata to the plot_queue for real-time plotting
-        await plot_queue.put(indata)
-
-        if non_silent_data_size < SILENCE_RATIO:
-            message += "Determined as silence. Skipping buffer."
-        else:
-            message += "Determined as non-silent."
-            # ... (Rest of your code)
-
-        await data_queue.put(message)
-
-
-def main():
-    ui_thread = threading.Thread(target=display_ui, args=(data_queue,))
-    plot_thread = threading.Thread(target=realtime_plot)
-    ui_thread.start()
-    plot_thread.start()
-
-    try:
-        asyncio.run(process_audio_buffer())
-    except KeyboardInterrupt:
-        ui_thread.join()
-        plot_thread.join()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/fast3.py b/src/fast3.py
deleted file mode 100644
index bb07fc7..0000000
--- a/src/fast3.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import sounddevice as sd
-import numpy as np
-
-import matplotlib.pyplot as plt
-import whisper
-import threading
-import asyncio
-import queue
-import sys
-import numpy as np
-import sounddevice as sd
-import asyncio
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-import matplotlib.pyplot as plt
-
-
-# SETTINGS
-# MODEL_TYPE = "base.en"
-# the model used for transcription. https://github.com/openai/whisper#available-models-and-languages
-# LANGUAGE = "English"
-# pre-set the language to avoid autodetection
-BLOCKSIZE = 24678 // 5
-# this is the base chunk size the audio is split into in samples. blocksize / 16000 = chunk length in seconds.
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-# should be set to the lowest sample amplitude that the speech in the audio material has
-SILENCE_RATIO = 300
-# number of samples in one buffer that are allowed to be higher than threshold
-
-# Initialize model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-processor = WhisperProcessor.from_pretrained("clu-ling/whisper-large-v2-japanese-5k-steps")
-model = WhisperForConditionalGeneration.from_pretrained(
-    "clu-ling/whisper-large-v2-japanese-5k-steps"
-).to(device)
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-global_ndarray = None
-# model = whisper.load_model(MODEL_TYPE)
-
-
-plt.ion()
-fig, ax = plt.subplots()
-(line,) = ax.plot(np.random.randn(BLOCKSIZE))
-ax.set_ylim([-(2**15), 2**15 - 1])
-ax.set_xlim(0, BLOCKSIZE)
-
-audio_queue = queue.Queue()  # Use a regular Python queue
-
-def audio_capture_thread():
-    """Thread that captures audio and puts blocks of data into the audio queue."""
-    with sd.InputStream(samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE) as stream:
-        while True:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-async def inputstream_generator():
-    """Generator that yields blocks of input data as NumPy arrays."""
-    while True:
-        indata, status = audio_queue.get()
-        yield indata, status
-
-async def inputstream_generator():
-    """Generator that yields blocks of input data as NumPy arrays."""
-    q_in = asyncio.Queue()
-    loop = asyncio.get_event_loop()
-
-    def callback(indata, frame_count, time_info, status):
-        loop.call_soon_threadsafe(q_in.put_nowait, (indata.copy(), status))
-
-    stream = sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE, callback=callback
-    )
-    with stream:
-        while True:
-            indata, status = await q_in.get()
-            yield indata, status
-
-
-async def process_audio_buffer():
-    global global_ndarray
-    receiving_audio = False
-
-    async for indata, status in inputstream_generator():
-        indata_flattened = abs(indata.flatten())
-        line.set_ydata(indata)
-        plt.draw()
-        plt.pause(0.001)
-
-        # Check if current chunk has significant audio
-        is_significant_audio = (
-            np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-        )
-
-        # If it has significant audio
-        if is_significant_audio:
-            print("Status: Receiving audio data.")
-            receiving_audio = True
-            if global_ndarray is not None:
-                global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-            else:
-                global_ndarray = indata
-            continue
-
-        # If current chunk is silent and there was audio being received previously
-        if not is_significant_audio and receiving_audio:
-            print("Status: Detected silence after receiving audio.")
-            if len(global_ndarray) < MIN_AUDIO_LENGTH:
-                print(
-                    f"Status: Audio length {len(global_ndarray)} is insufficient. Awaiting more input."
-                )
-                continue
-
-            print("Status: Processing audio data...")
-            indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-            input_features = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
-            ).input_features
-            predicted_ids = model.generate(
-                input_features.to(device), forced_decoder_ids=forced_decoder_ids
-            )
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-            print(f"Transcription: {transcription}")
-            global_ndarray = None
-            receiving_audio = False
-        else:
-            print("Status: Detected silence.")
-
-
-if __name__ == "__main__":
-    thread = threading.Thread(target=audio_capture_thread, daemon=True)
-    thread.start()
-    try:
-        asyncio.run(process_audio_buffer())
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
diff --git a/src/fast4.py b/src/fast4.py
deleted file mode 100644
index 11ea611..0000000
--- a/src/fast4.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-import queue
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-# Initialize model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# print(f"Using device: {device}")
-# "clu-ling/whisper-large-v2-japanese-5k-steps"
-model_name = "vumichien/whisper-small-ja"
-# model_name = "kimbochen/whisper-tiny-ja"
-# C:\Users\anosillus\.cache\huggingface\hub\models--kimbochen--whisper-tiny-ja
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-global_ndarray = None
-audio_queue = queue.Queue()
-
-
-def audio_capture_thread():
-    """Thread that captures audio and puts blocks of data into the audio queue."""
-    with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-    ) as stream:
-        while True:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-
-def transcription_and_plotting():
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(np.random.randn(BLOCKSIZE))
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    global global_ndarray
-
-    while True:
-        indata, status = audio_queue.get()
-        indata_flattened = abs(indata.flatten())
-
-        line.set_ydata(indata)
-        plt.draw()
-        plt.pause(0.001)
-
-        is_significant_audio = (
-            np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-        )
-
-        if is_significant_audio:
-            if global_ndarray is not None:
-                global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-            else:
-                global_ndarray = indata
-        elif global_ndarray is not None:
-            if len(global_ndarray) < MIN_AUDIO_LENGTH:
-                continue
-            indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-            global_ndarray = None
-            input_data = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
-            ).input_features
-            input_data = input_data.half()
-            predicted_ids = model.generate(
-                input_data.to(device), forced_decoder_ids=forced_decoder_ids
-            )
-
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-            print(f"Transcription: {transcription}")
-
-
-if __name__ == "__main__":
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    capture_thread.start()
-
-    try:
-        transcription_and_plotting()
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
-
-"""
-
-# ... [上記のコードとインポート文はここに続く]
-
-# 1. モデルとトークナイザの初期化
-emotion_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-emotion_model = BertForSequenceClassification.from_pretrained("bert-base-uncased").to(device)
-
-def predict_emotion(text):
-    # 2. `predict_emotion`関数を定義
-    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    for key in inputs:
-        inputs[key] = inputs[key].to(device)
-
-    with torch.no_grad():
-        outputs = emotion_model(**inputs)
-    
-    probabilities = softmax(outputs.logits, dim=1)
-    class_id = torch.argmax(probabilities).item()
-    
-    return "Positive" if class_id == 1 else "Negative"
-
-def transcription_and_plotting():
-    # ... [関数の中身の初めの部分]
-
-    while True:
-        # ... [関数の中のループの初めの部分]
-
-        if is_significant_audio:
-            # ... [この部分の残り]
-
-            # 3. トランスクリプトが得られた後、そのトランスクリプトを感情分析関数に渡す
-            if transcription:
-                emotion_result = predict_emotion(transcription[0])
-
-                # 4. トランスクリプトと感情の結果を表示
-                print(f"Transcription: {transcription}")
-                print(f"Emotion: {emotion_result}")
-
-# ... [関数の定義の後の部分]
-"""
diff --git a/src/fast5.py b/src/fast5.py
deleted file mode 100644
index 29fda2c..0000000
--- a/src/fast5.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import multiprocessing
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from transformers import BertTokenizer, BertForSequenceClassification
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-# Initialize Whisper model and processor
-model_name = "vumichien/whisper-small-ja"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-# Initialize BERT model
-BERT_MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
-tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME).to(device)
-bert_model = bert_model.half()
-
-audio_queue = multiprocessing.Queue()
-classification_queue = multiprocessing.Queue()
-shared_ndarray_list = None
-
-
-def audio_capture_thread():
-    try:
-        print("Starting audio capture thread...")
-        with sd.InputStream(
-            samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-        ) as stream:
-            while True:
-                indata, status = stream.read(BLOCKSIZE)
-                print(f"Captured audio data: {indata[:10]}")
-                audio_queue.put((indata, status))
-    except Exception as e:
-        print(f"Error in audio_capture_thread: {e}")
-
-
-def bert_classification():
-    try:
-        print("Starting BERT classification thread...")
-        while True:
-            transcription = classification_queue.get()
-            inputs = tokenizer(
-                transcription,
-                return_tensors="pt",
-                truncation=True,
-                padding=True,
-                max_length=256,
-            ).to(device)
-            outputs = bert_model(**inputs)
-            predicted_label_idx = torch.argmax(outputs.logits, dim=1).item()
-            labels = ["very negative", "negative", "neutral", "positive", "very positive"]
-            print(f"Predicted emotion: {labels[predicted_label_idx]}")
-    except Exception as e:
-        print(f"Error in bert_classification: {e}")
-
-
-def transcription_and_plotting():
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(np.random.randn(BLOCKSIZE))
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    global shared_ndarray_list
-    while True:
-        indata, status = audio_queue.get()
-        indata_flattened = abs(indata.flatten())
-        line.set_ydata(indata)
-        plt.draw()
-
-        is_significant_audio = (
-            np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-        )
-        if is_significant_audio:
-            shared_ndarray_list.extend(indata.flatten())
-        elif len(shared_ndarray_list) > 0:
-            global_ndarray = np.array(shared_ndarray_list, dtype="int16")
-            if len(global_ndarray) < MIN_AUDIO_LENGTH:
-                continue
-            indata_transformed = global_ndarray.astype(np.float32) / 32768.0
-            shared_ndarray_list.clear()
-            input_data = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
-            ).input_features
-            input_data = input_data.half()
-            predicted_ids = model.generate(
-                input_data.to(device), forced_decoder_ids=forced_decoder_ids
-            )
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-            print(f"Transcription: {transcription}")
-            classification_queue.put(transcription[0])
-
-
-def main():
-    global shared_ndarray_list
-    manager = multiprocessing.Manager()
-    shared_ndarray_list = manager.list()
-
-    capture_process = multiprocessing.Process(target=audio_capture_thread)
-    classification_process = multiprocessing.Process(target=bert_classification)
-
-    capture_process.start()
-    classification_process.start()
-
-    try:
-        transcription_and_plotting()
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
-        capture_process.terminate()
-        classification_process.terminate()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/fast6.py b/src/fast6.py
deleted file mode 100644
index 21914b6..0000000
--- a/src/fast6.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from transformers import BertTokenizer, BertForSequenceClassification
-import queue
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-# Initialize Whisper model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_name = "vumichien/whisper-small-ja"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-# Initialize BERT model and tokenizer for sentiment analysis
-BERT_MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
-tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME).to(device)
-bert_model = bert_model.half()
-
-global_ndarray = None
-audio_queue = queue.Queue()
-classification_queue = queue.Queue()
-
-
-def audio_capture_thread():
-    with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-    ) as stream:
-        while True:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-
-def bert_classification():
-    while True:
-        transcription = classification_queue.get()
-        inputs = tokenizer(
-            transcription, return_tensors="pt", truncation=True, padding=True, max_length=256
-        ).to(device)
-        outputs = bert_model(**inputs)
-        predicted_label_idx = torch.argmax(outputs.logits, dim=1).item()
-
-        labels = ["very negative", "negative", "neutral", "positive", "very positive"]
-        print(f"Predicted emotion: {labels[predicted_label_idx]}")
-
-
-def transcription_and_plotting():
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(np.random.randn(BLOCKSIZE))
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    global global_ndarray
-
-    while True:
-        indata, status = audio_queue.get()
-        indata_flattened = abs(indata.flatten())
-
-        line.set_ydata(indata)
-        plt.draw()
-        plt.pause(0.001)
-
-        is_significant_audio = (
-            np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-        )
-
-        if is_significant_audio:
-            if global_ndarray is not None:
-                global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-            else:
-                global_ndarray = indata
-        elif global_ndarray is not None:
-            if len(global_ndarray) < MIN_AUDIO_LENGTH:
-                continue
-            indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-            global_ndarray = None
-            input_data = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
-            ).input_features
-            input_data = input_data.half()
-            predicted_ids = model.generate(
-                input_data.to(device), forced_decoder_ids=forced_decoder_ids
-            )
-
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-            print(f"Transcription: {transcription}")
-
-            # BERT分類スレッドに転送
-            classification_queue.put(transcription[0])
-
-
-if __name__ == "__main__":
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    classification_thread = threading.Thread(target=bert_classification)
-
-    capture_thread.start()
-    classification_thread.start()
-
-    try:
-        transcription_and_plotting()
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
diff --git a/src/fast7.py b/src/fast7.py
deleted file mode 100644
index 483278e..0000000
--- a/src/fast7.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from transformers import BertTokenizer, BertForSequenceClassification
-import queue
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-audio_queue = queue.Queue()
-classification_queue = queue.Queue()
-running = True
-
-
-def initialize_models():
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-    # Whisper model and processor
-    model_name = "vumichien/whisper-small-ja"
-    processor = WhisperProcessor.from_pretrained(model_name)
-    model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-    model = model.half()
-    forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-    # BERT model and tokenizer
-    BERT_MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
-    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-    bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME).to(device)
-    bert_model = bert_model.half()
-
-    return model, processor, forced_decoder_ids, bert_model, tokenizer, device
-
-
-def audio_capture_thread():
-    global running
-    with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-    ) as stream:
-        while running:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-    audio_queue.put(None)  # Signal termination
-
-
-def bert_classification(tokenizer, bert_model, device):
-    labels = ["very negative", "negative", "neutral", "positive", "very positive"]
-    while True:
-        transcription = classification_queue.get()
-        if transcription is None:  # Check for termination signal
-            break
-        inputs = tokenizer(
-            transcription, return_tensors="pt", truncation=True, padding=True, max_length=256
-        ).to(device)
-        outputs = bert_model(**inputs)
-        predicted_label_idx = torch.argmax(outputs.logits, dim=1).item()
-
-        print(f"Predicted emotion: {labels[predicted_label_idx]}")
-
-
-def process_audio_data(line, global_ndarray, model, processor, forced_decoder_ids, device):
-    indata, status = audio_queue.get()
-    if indata is None:  # Check for termination signal
-        return None, None
-
-    indata_flattened = abs(indata.flatten())
-
-    line.set_ydata(indata)
-    plt.draw()
-    plt.pause(0.001)
-
-    is_significant_audio = (
-        np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-    )
-
-    if is_significant_audio:
-        if global_ndarray is not None:
-            global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-        else:
-            global_ndarray = indata
-    elif global_ndarray is not None:
-        if len(global_ndarray) < MIN_AUDIO_LENGTH:
-            return global_ndarray, None
-        indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-        global_ndarray = None
-        input_data = processor(
-            indata_transformed, sampling_rate=16000, return_tensors="pt"
-        ).input_features
-        input_data = input_data.half()
-        predicted_ids = model.generate(
-            input_data.to(device), forced_decoder_ids=forced_decoder_ids
-        )
-
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        print(f"Transcription: {transcription}")
-
-        # Send to BERT classification thread
-        classification_queue.put(transcription)
-    return global_ndarray, None
-
-
-def update_plot(line):
-    global running
-    while running:
-        plt.draw()
-        plt.pause(0.01)
-
-
-if __name__ == "__main__":
-    model, processor, forced_decoder_ids, bert_model, tokenizer, device = initialize_models()
-
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(np.random.randn(BLOCKSIZE))
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    classification_thread = threading.Thread(
-        target=bert_classification, args=(tokenizer, bert_model, device)
-    )
-    plot_thread = threading.Thread(target=update_plot, args=(line,))
-
-    capture_thread.start()
-    classification_thread.start()
-    plot_thread.start()
-
-    global_ndarray = None
-
-    try:
-        while running:
-            global_ndarray, _ = process_audio_data(
-                line, global_ndarray, model, processor, forced_decoder_ids, device
-            )
-            if global_ndarray is None:
-                running = False
-    except KeyboardInterrupt:
-        running = False
-        capture_thread.join()
-        classification_thread.join()
-        plot_thread.join()  # Make sure to join the plot thread as well
-        classification_queue.put(None)  # Signal termination to the classification thread
-        print("\nInterrupted by user")
diff --git a/src/fast8.py b/src/fast8.py
deleted file mode 100644
index 90ad3b4..0000000
--- a/src/fast8.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from transformers import BertTokenizer, BertForSequenceClassification
-import queue
-
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-# Initialize Whisper model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_name = "vumichien/whisper-small-ja"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-# Initialize BERT model and tokenizer for sentiment analysis
-BERT_MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
-tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME).to(device)
-bert_model = bert_model.half()
-
-global_ndarray = None
-audio_queue = queue.Queue()
-classification_queue = queue.Queue()
-
-# 追加: スレッドの動作を制御するフラグ
-running = True
-
-
-def audio_capture_thread():
-    with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-    ) as stream:
-        while running:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-    audio_queue.put(("STOP", None))
-
-
-def bert_classification():
-    while True:
-        transcription = classification_queue.get()
-        if transcription == "STOP":
-            break
-        inputs = tokenizer(
-            transcription, return_tensors="pt", truncation=True, padding=True, max_length=256
-        ).to(device)
-        outputs = bert_model(**inputs)
-        predicted_label_idx = torch.argmax(outputs.logits, dim=1).item()
-
-        labels = ["very negative", "negative", "neutral", "positive", "very positive"]
-        print(f"Predicted emotion: {labels[predicted_label_idx]}")
-
-
-def transcription_and_plotting():
-    plt.ion()
-    fig, ax = plt.subplots()
-    (line,) = ax.plot(np.random.randn(BLOCKSIZE))
-    ax.set_ylim([-(2**15), 2**15 - 1])
-    ax.set_xlim(0, BLOCKSIZE)
-
-    global global_ndarray
-
-    while running:
-        indata, status = audio_queue.get()
-        indata_flattened = abs(indata.flatten())
-
-        line.set_ydata(indata)
-        plt.draw()
-        plt.pause(0.001)
-
-        is_significant_audio = (
-            np.asarray(np.where(indata_flattened > SILENCE_THRESHOLD)).size >= SILENCE_RATIO
-        )
-
-        if is_significant_audio:
-            if global_ndarray is not None:
-                global_ndarray = np.concatenate((global_ndarray, indata), dtype="int16")
-            else:
-                global_ndarray = indata
-        elif global_ndarray is not None:
-            if len(global_ndarray) < MIN_AUDIO_LENGTH:
-                continue
-            indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
-            global_ndarray = None
-            input_data = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
-            ).input_features
-            input_data = input_data.half()
-            predicted_ids = model.generate(
-                input_data.to(device), forced_decoder_ids=forced_decoder_ids
-            )
-
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-            print(f"Transcription: {transcription}")
-
-            # BERT分類スレッドに転送
-            classification_queue.put(transcription[0])
-
-
-if __name__ == "__main__":
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    classification_thread = threading.Thread(target=bert_classification)
-
-    capture_thread.start()
-    classification_thread.start()
-
-    try:
-        transcription_and_plotting()
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
-        running = False
-        plt.close()
-
-    capture_thread.join()
-    classification_thread.join()
diff --git a/src/fast9.py b/src/fast9.py
deleted file mode 100644
index 1cce2b8..0000000
--- a/src/fast9.py
+++ /dev/null
@@ -1,229 +0,0 @@
-import sounddevice as sd
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
-import threading
-import torch
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from transformers import BertTokenizer, BertForSequenceClassification
-import queue
-from transformers import AutoModelForSequenceClassification, BertJapaneseTokenizer
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.animation import FuncAnimation
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-
-plt.rcParams["font.family"] = "Meiryo"
-# SETTINGS
-BLOCKSIZE = 24678 // 5
-SILENCE_THRESHOLD = 700
-MIN_AUDIO_LENGTH = 8000
-SILENCE_RATIO = 300
-
-# Initialize Whisper model and processor
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_name = "vumichien/whisper-small-ja"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
-model = model.half()
-
-
-forced_decoder_ids = processor.get_decoder_prompt_ids(language="ja", task="transcribe")
-
-BERT_MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
-tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME).to(device)
-bert_model = bert_model.half()
-
-global_ndarray = None
-audio_queue = queue.Queue()
-classification_queue = queue.Queue()
-
-# 追加: スレッドの動作を制御するフラグ
-running = True
-
-
-sentiment_model = AutoModelForSequenceClassification.from_pretrained(
-    "patrickramos/bert-base-japanese-v2-wrime-fine-tune"
-).to(device)
-sentiment_model = sentiment_model.half()
-sentiment_tokenizer = BertJapaneseTokenizer.from_pretrained(
-    "cl-tohoku/bert-base-japanese-whole-word-masking"
-)
-
-emotion_translation = {
-    "surprise": "驚き",
-    "sadness": "悲しみ",
-    "fear": "恐れ",
-    "disgust": "嫌悪",
-    "anger": "怒り",
-    "anticipation": "期待",
-    "joy": "喜び",
-    "trust": "信頼",
-}
-
-
-def audio_capture_thread():
-    with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
-    ) as stream:
-        while running:
-            indata, status = stream.read(BLOCKSIZE)
-            audio_queue.put((indata, status))
-
-    audio_queue.put(("STOP", None))
-
-
-plotting_queue = queue.Queue()
-
-
-def bert_classification():
-    while True:
-        transcription = classification_queue.get()
-        if transcription == "STOP":
-            break
-
-        # Sentiment analysis with the provided model
-        results = sentiment_tokenizer(
-            transcription, return_tensors="pt", truncation=True, padding=True, max_length=256
-        ).to(device)
-        outputs = sentiment_model(**results)
-        sentiment_results = torch.softmax(outputs.logits, dim=1).cpu().detach().numpy()
-
-        reader_results = [
-            {"label": label.item(), "score": score}
-            for label, score in zip(
-                outputs.logits.argmax(dim=1).cpu().numpy(), sentiment_results[0]
-            )
-            if "reader_" in str(label.item())
-        ]
-        values = [result["score"] for result in reader_results]
-
-        # ラベルを日本語に変換
-        labels = [emotion_translation[result["label"]] for result in reader_results]
-
-        # N-gramの設定 (ここでは2-gram)
-        N = 2
-        ngram_values = [np.mean(values[i : i + N]) for i in range(len(values) - N + 1)]
-        ngram_labels = [f"{labels[i]}-{labels[i+1]}" for i in range(len(labels) - N + 1)]
-
-        # データをキューに追加
-        plotting_queue.put((ngram_values, ngram_labels))
-
-
-
-emotions = ["喜び", "悲しみ", "期待", "驚き", "怒り", "恐れ", "嫌悪", "信頼"]
-
-def get_emotion_probs(text):
-    token = tokenizer(
-        text, return_tensors="pt", truncation=True, max_length=512, padding="max_length"
-    )
-    output = model(**token)
-    normalized_logits = (output.logits - torch.min(output.logits)) / (
-        torch.max(output.logits) - torch.min(output.logits)
-    )
-    probs = normalized_logits.squeeze().tolist()
-    probs.append(probs[0])  # 最初の確率を最後にも追加
-    return probs
-
-def main_plotting():
-    plt.ion()
-    fig = plt.figure(figsize=(10, 7))
-    ax1 = fig.add_subplot(2, 1, 1)
-    ax1.set_ylim([-(2**15), 2**15 - 1])
-    ax1.set_xlim(0, BLOCKSIZE)
-    (line,) = ax1.plot(np.zeros(BLOCKSIZE), 'g-')
-
-    ax2 = fig.add_subplot(2, 1, 2, polar=True)
-    ax2.set_ylim(0, 1)
-    theta = np.linspace(0, 2 * np.pi, len(emotions) + 1, endpoint=True)
-    (l,) = ax2.plot([], [])
-
-    index = 0
-
-    def update(i):
-        global index
-        # 音声データの取得と更新
-        indata, _ = audio_queue.get()
-        if isinstance(indata, str) and indata == "STOP":
-            return
-        
-        line.set_ydata(indata)
-        
-        # 以下の部分は、感情分析のためのテキストデータを取得するものと仮定しています。
-        # もし実際にテキストデータがキューに入れられる場合は、以下の行を有効にしてください。
-        text = classification_queue.get()
-
-        # この例では、固定のテキストリストからデータを取得します。
-        text = texts[index % len(texts)]
-        index += 1
-        
-        data = get_emotion_probs(text)
-
-        ax2.clear()
-        ax2.set_xticks(theta)
-        ax2.set_xticklabels(emotions + [emotions[0]])  # ラベルも最初のものを最後に追加
-        ax2.set_ylim(0, 1)
-        (l,) = ax2.plot(theta, data, "r-", lw=2)
-
-    ani = FuncAnimation(fig, update, interval=1000, blit=False)
-    plt.show()
-
-
-# def main_plotting():
-#     plt.ion()
-#     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 7))
-# 
-#     ax1.set_ylim([-(2**15), 2**15 - 1])
-#     ax1.set_xlim(0, BLOCKSIZE)
-#     (line,) = ax1.plot(np.zeros(BLOCKSIZE), 'g-')
-# 
-#     # レーダーチャートの初期設定
-#     emotions = list(emotion_translation.values())
-#     num_vars = len(emotions)
-#     angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
-#     ax2.set_theta_offset(np.pi / 2)
-#     ax2.set_theta_direction(-1)
-#     ax2.set_rlabel_position(115)
-#     ax2.set_xticks(angles)
-#     ax2.set_xticklabels(emotions)
-#     ax2.set_ylim(0, 1)
-# 
-#     while running:
-#         # 音声データの取得と更新
-#         indata, _ = audio_queue.get()
-#         if isinstance(indata, str) and indata == "STOP":
-#             break
-#         line.set_ydata(indata)
-# 
-#         # 感情分析データの取得とレーダーチャートの更新
-#         ngram_values, ngram_labels = plotting_queue.get()
-#         ax2.clear()
-#         ax2.set_xticks(angles)
-#         ax2.set_xticklabels(emotions)
-#         ax2.set_ylim(0, 1)
-#         ax2.plot(angles, ngram_values, color='b', linewidth=2, linestyle='solid')
-#         ax2.fill(angles, ngram_values, color='skyblue', alpha=0.4)
-# 
-#         plt.pause(0.001)
-# 
-#     plt.close()
-
-
-if __name__ == "__main__":
-    capture_thread = threading.Thread(target=audio_capture_thread)
-    classification_thread = threading.Thread(target=bert_classification)
-
-    capture_thread.start()
-    classification_thread.start()
-
-    try:
-        print('start')
-        main_plotting()
-    except KeyboardInterrupt:
-        print("\nInterrupted by user")
-        running = False
-
-    capture_thread.join()
-    classification_thread.join()
diff --git a/dev/b.py b/src/fast_voice2word.py
similarity index 100%
rename from dev/b.py
rename to src/fast_voice2word.py
diff --git a/src/foo.py b/src/foo.py
deleted file mode 100644
index 5c392da..0000000
--- a/src/foo.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.animation import FuncAnimation
-
-# 感情の日本語訳
-emotion_translation = {
-    "surprise": "驚き",
-    "sadness": "悲しみ",
-    "fear": "恐れ",
-    "disgust": "嫌悪",
-    "anger": "怒り",
-    "anticipation": "期待",
-    "joy": "喜び",
-    "trust": "信頼"
-}
-
-# readerの感情のデータ (仮のデータを設定)
-labels = list(emotion_translation.values())
-values = [0.073, 0.075, 0.076, 0.041, 0.023, 0.022, 0.022, 0.020]
-# データの最初の値を末尾に追加して閉じる
-values.append(values[0])
-
-# アニメーションの設定
-fig = plt.figure(figsize=(7, 7))
-ax = plt.subplot(111, polar=True)
-ax.set_ylim(0, 0.1)
-
-theta = np.linspace(0, 2 * np.pi, len(values), endpoint=True)
-line, = ax.plot(theta, values, "o-", lw=3)
-ax.set_thetagrids(np.arange(0, 360, 360/len(labels)), labels)
-
-def animate(i):
-    values_shifted = np.roll(values, shift=i)
-    line.set_ydata(values_shifted)
-    return line,
-
-ani = FuncAnimation(fig, animate, frames=len(values)-1, repeat=True, blit=True)
-plt.show()
\ No newline at end of file
diff --git a/dev/a.py b/src/word2emotion_and_plotting.py
similarity index 88%
rename from dev/a.py
rename to src/word2emotion_and_plotting.py
index 569302a..e98d5d7 100644
--- a/dev/a.py
+++ b/src/word2emotion_and_plotting.py
@@ -1,10 +1,11 @@
-import sounddevice as sd
-import numpy as np
+import queue
+import threading
+
 import matplotlib.pyplot as plt
+import numpy as np
+import sounddevice as sd
 import torch
-import threading
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-import queue
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
 
 # SETTINGS
 BLOCKSIZE = 24678 // 5
@@ -29,7 +30,10 @@
 
 def audio_capture_thread():
     with sd.InputStream(
-        samplerate=16000, channels=1, dtype="int16", blocksize=BLOCKSIZE
+        samplerate=16000,
+        channels=1,
+        dtype="int16",
+        blocksize=BLOCKSIZE,
     ) as stream:
         while running:
             indata, status = stream.read(BLOCKSIZE)
@@ -73,11 +77,14 @@ def transcription_and_plotting():
             indata_transformed = global_ndarray.flatten().astype(np.float32) / 32768.0
             global_ndarray = None
             input_data = processor(
-                indata_transformed, sampling_rate=16000, return_tensors="pt"
+                indata_transformed,
+                sampling_rate=16000,
+                return_tensors="pt",
             ).input_features
             input_data = input_data.half()
             predicted_ids = model.generate(
-                input_data.to(device), forced_decoder_ids=forced_decoder_ids
+                input_data.to(device),
+                forced_decoder_ids=forced_decoder_ids,
             )
 
             transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]