diff --git a/package-lock.json b/package-lock.json index 812bcfb..631a86b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -429,6 +429,7 @@ "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.8.tgz", "integrity": "sha512-kIUidOgc0ZdyXo4Ahn9Zas+OayqOfk4ZoKPi7XaDipNSWSApc2+QK5BVcjvwtzxstsNOrmXJiJWEN6WPF/MvAw==", "license": "MIT", + "peer": true, "dependencies": { "@cfworker/json-schema": "^4.0.2", "ansi-styles": "^5.0.0", @@ -582,29 +583,6 @@ } } }, - "node_modules/@langchain/openai/node_modules/ws": { - "version": "8.18.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", - "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, "node_modules/@malept/cross-spawn-promise": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@malept/cross-spawn-promise/-/cross-spawn-promise-1.1.1.tgz", @@ -945,6 +923,7 @@ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -1146,7 +1125,6 @@ "integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "archiver-utils": "^2.1.0", "async": "^3.2.4", @@ -1166,7 +1144,6 @@ "integrity": "sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "glob": "^7.1.4", "graceful-fs": "^4.2.0", @@ -1189,7 +1166,6 @@ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -1205,8 +1181,7 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/archiver-utils/node_modules/string_decoder": { "version": "1.1.1", @@ -1214,7 +1189,6 @@ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -1478,7 +1452,6 @@ "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "buffer": "^5.5.0", "inherits": "^2.0.4", @@ -1998,7 +1971,6 @@ "integrity": "sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "buffer-crc32": "^0.2.13", "crc32-stream": "^4.0.2", @@ -2211,7 +2183,6 @@ "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "crc32": "bin/crc32.njs" }, @@ -2225,7 +2196,6 @@ "integrity": "sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "crc-32": "^1.2.0", "readable-stream": "^3.4.0" @@ -2511,6 +2481,7 @@ "integrity": "sha512-rcJUkMfnJpfCboZoOOPf4L29TRtEieHNOeAbYPWPxlaBw/Z1RKrRA86dOI9rwaI4tQSc/RD82zTNHprfUHXsoQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "app-builder-lib": "24.13.3", "builder-util": "24.13.1", @@ -2720,7 +2691,6 @@ "integrity": "sha512-oHkV0iogWfyK+ah9ZIvMDpei1m9ZRpdXcvde1wTpra2U8AFDNNpqJdnin5z+PM1GbQ5BoaKCWas2HSjtR0HwMg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "app-builder-lib": "24.13.3", "archiver": "^5.3.1", @@ -2734,7 +2704,6 @@ "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", @@ -2750,7 +2719,6 @@ "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "universalify": "^2.0.0" }, @@ -2764,7 +2732,6 @@ "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">= 10.0.0" } @@ -3531,8 +3498,7 @@ "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/fs-extra": { "version": "8.1.0", @@ -4739,8 +4705,7 @@ "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/isbinaryfile": { "version": "5.0.6", @@ -5115,7 +5080,6 @@ "integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "readable-stream": "^2.0.5" }, @@ -5129,7 +5093,6 @@ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -5145,8 +5108,7 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lazystream/node_modules/string_decoder": { "version": "1.1.1", @@ -5154,7 +5116,6 @@ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -5182,24 +5143,21 @@ "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.difference": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/lodash.difference/-/lodash.difference-4.5.0.tgz", "integrity": "sha512-dS2j+W26TQ7taQBGN8Lbbq04ssV3emRw4NY58WErlTO29pIqS0HmoT5aJ9+TUQ1N3G+JOZSji4eugsWwGp9yPA==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.flatten": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", "integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lodash.includes": { "version": "4.3.0", @@ -5248,8 +5206,7 @@ "resolved": "https://registry.npmjs.org/lodash.union/-/lodash.union-4.6.0.tgz", "integrity": "sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/lowercase-keys": { "version": "2.0.0", @@ -5605,7 +5562,6 @@ "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -5923,8 +5879,7 @@ "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/processenv": { "version": "1.1.0", @@ -6099,7 +6054,6 @@ "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==", "dev": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "minimatch": "^5.1.0" } @@ -7037,7 +6991,6 @@ "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "bl": "^4.0.3", "end-of-stream": "^1.4.1", @@ -7679,7 +7632,6 @@ "integrity": "sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "archiver-utils": "^3.0.4", "compress-commons": "^4.1.2", @@ -7695,7 +7647,6 @@ "integrity": "sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "glob": "^7.2.3", "graceful-fs": "^4.2.0", @@ -7717,6 +7668,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.13.tgz", "integrity": "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/run_local_analysis.py b/run_local_analysis.py index 40d3be3..7338338 100644 --- a/run_local_analysis.py +++ b/run_local_analysis.py @@ -27,6 +27,7 @@ from AssemblyAIv2.analyzers.preposition_analyzer import PrepositionAnalyzer from AssemblyAIv2.analyzers.learner_error_analyzer import LearnerErrorAnalyzer from AssemblyAIv2.analyzers.lexical_engine import LexicalEngine +from AssemblyAIv2.analyzers.fluency_analyzer import FluencyAnalyzer def run_tiered_analysis( student_name: str, @@ -130,6 +131,9 @@ def run_tiered_analysis( except: pass # 5. Final Context Construction + fluency_analyzer = FluencyAnalyzer() + student_words = [w for t in main_analyzer.student_turns_list for w in t.get('words', [])] + analysis_context = { "caf_metrics": cast(Dict[str, Any], basic_metrics).get('student_metrics', {}).get('caf_metrics') or "DATA_MISSING", "student_metrics": cast(Dict[str, Any], basic_metrics).get('student_metrics', {}), @@ -138,7 +142,11 @@ def run_tiered_analysis( "register_analysis": {"scores": AmalgumAnalyzer().analyze_register(student_text), "classification": AmalgumAnalyzer().get_genre_classification(student_text)}, "detected_errors": detected_errors, "pos_summary": pos_ratios, - "lexical_analysis": LexicalEngine().analyze_production([w for t in main_analyzer.student_turns_list for w in t.get('words', [])]) + "lexical_analysis": LexicalEngine().analyze_production(student_words), + "fluency_analysis": { + "hesitation": fluency_analyzer.analyze_hesitation(student_words), + "articulation_rate": fluency_analyzer.calculate_articulation_rate(student_words) + } } logger.info("✅ Tiered Analysis Complete") diff --git a/upload_audio_aai.py b/upload_audio_aai.py index 7028387..eadcf8c 100644 --- a/upload_audio_aai.py +++ b/upload_audio_aai.py @@ -271,20 +271,30 @@ async def perform_batch_diarization(audio_path: str, student_name: str) -> Mappi try: # 1. UPLOAD ONCE logger.info(" 🔹 Uploading audio file...") - upload_url = await asyncio.to_thread(transcriber.upload_file, audio_path) - + try: + upload_url = await asyncio.to_thread(transcriber.upload_file, audio_path) + except Exception as e: + logger.critical(f"💥 FATAL: Audio upload failed: {e}") + return None + # 2. SUBMIT BOTH JOBS logger.info(" 🔹 Submitting Dual-Pass Transcriptions...") - t_diar_task = asyncio.to_thread(transcriber.transcribe, upload_url, config_diar) - t_raw_task = asyncio.to_thread(transcriber.transcribe, upload_url, config_raw) - - t_diar, t_raw = await asyncio.gather(t_diar_task, t_raw_task) - + try: + t_diar_task = asyncio.to_thread(transcriber.transcribe, upload_url, config_diar) + t_raw_task = asyncio.to_thread(transcriber.transcribe, upload_url, config_raw) + t_diar, t_raw = await asyncio.gather(t_diar_task, t_raw_task) + except Exception as e: + logger.critical(f"💥 FATAL: Transcription job submission failed: {e}") + return None + + # --- VALIDATION --- if t_diar.status == aai.TranscriptStatus.error: - logger.error(f"❌ Pass 1 Failed: {t_diar.error}") + logger.error(f"❌ Pass 1 (Diarization) Failed: {t_diar.error}") + # Even if it fails, we might proceed with raw if that's better than nothing. + # For now, we'll fail hard. return None if t_raw.status == aai.TranscriptStatus.error: - logger.error(f"❌ Pass 2 Failed: {t_raw.error}") + logger.error(f"❌ Pass 2 (Raw Content) Failed: {t_raw.error}") return None logger.info(" ✅ Both passes complete. Merging...") @@ -316,41 +326,49 @@ async def perform_batch_diarization(audio_path: str, student_name: str) -> Mappi # Iterate Raw Words raw_words = t_raw.words if t_raw.words else [] # Use the top-level words list from Raw transcript + # [REVISED] More robust speaker mapping logic # Helper to find speaker for a given time range - # Simple optimization: keep track of last index diar_idx = 0 - - for w in raw_words: - w_start = w.start - w_end = w.end + for i, w in enumerate(raw_words): + w_start, w_end = w.start, w.end found_speaker: str | None = None - - # Look ahead in diar_map - # We look for ANY overlap. + + # Search for an overlapping speaker tag temp_idx = diar_idx while temp_idx < len(diar_map): d = diar_map[temp_idx] - d_end = float(d['end']) - d_start = float(d['start']) - + d_start, d_end = float(d['start']), float(d['end']) + + # Advance if diarization is behind if d_end < w_start: - # Diarized word ended before this raw word started. Move pointer. - diar_idx = temp_idx # Safe to advance + diar_idx = temp_idx temp_idx += 1 continue + + # Break if diarization is too far ahead if d_start > w_end: - # Diarized word starts after this raw word ends. No overlap possible anymore. break - # Overlap found! + # Overlap found found_speaker = str(d['speaker']) - break # Take first overlap + break + # --- Fallback Logic --- if not found_speaker: - # Fallback: Inherit previous speaker or Unknown - found_speaker = current_speaker if current_speaker != "Unknown" else "A" - - # Start new turn if speaker changed + # If no speaker found, inherit from the previous word's speaker. + # This handles cases where diarization might miss a word. + if i > 0 and 'speaker' in raw_words[i-1]: + found_speaker = raw_words[i-1]['speaker'] + logger.debug(f" Word '{w.text}' inherited speaker '{found_speaker}'") + else: + # For the very first word or unusual cases, default to "A" + found_speaker = "A" + logger.warning(f" Word '{w.text}' defaulted to speaker 'A'") + + # Attach speaker to the word object itself for robust turn construction + w.speaker = found_speaker + + # --- Turn Construction --- if found_speaker != current_speaker: if current_words: # Flush previous turn @@ -662,24 +680,39 @@ async def process_and_upload(audio_path: str, student_name: str, notes: str = "" } try: + # --- Incomplete-Proof Downloads --- + # We now explicitly check for the presence of data and log if it's missing, + # ensuring that a file is always created, even if empty. + # 1. _words.json (Authoritative) + words_data = params.get('turns', []) + if not words_data: + logger.warning(" ⚠️ No 'turns' data found for _words.json. File will be empty.") with open(capture_dir / f"{base_filename}_words.json", "w") as f: - json.dump(params['turns'], f, indent=2) # Saving full turns structure which contains words + json.dump(words_data, f, indent=2) # 2. _sentences.json + sentences_data = params.get('sentences', []) + if not sentences_data: + logger.warning(" ⚠️ No 'sentences' data found for _sentences.json. File will be empty.") with open(capture_dir / f"{base_filename}_sentences.json", "w") as f: - json.dump(params['sentences'], f, indent=2) + json.dump(sentences_data, f, indent=2) # 3. _raw.txt + raw_text_data = params.get('transcriptText', "") + if not raw_text_data: + logger.warning(" ⚠️ No 'transcriptText' data found for _raw.txt. File will be empty.") with open(capture_dir / f"{base_filename}_raw.txt", "w") as f: - f.write(params['transcriptText']) + f.write(raw_text_data) # 4. _diarized.txt + diarized_text_data = params.get('punctuatedTranscript', "") + if not diarized_text_data: + logger.warning(" ⚠️ No 'punctuatedTranscript' data found for _diarized.txt. File will be empty.") with open(capture_dir / f"{base_filename}_diarized.txt", "w") as f: - f.write(params['punctuatedTranscript']) + f.write(diarized_text_data) # 5. _petty_analysis.json (Local Analysis Metrics) - # Renamed to 'petty_analysis' per user instruction for clarity with open(capture_dir / f"{base_filename}_petty_analysis.json", "w") as f: json.dump(analysis_context, f, indent=2) diff --git a/viewer2.html b/viewer2.html index 8cfffe9..9c60b0f 100644 --- a/viewer2.html +++ b/viewer2.html @@ -142,265 +142,300 @@ }; }, []); + + +
+ + + + +