glacy · glacy · Apr 26, 2026 · Copilot · Apr 26, 2026 · Copilot
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -6,3 +6,6 @@
 ## 2025-05-20 - Pre-compiling Regex in Loops
 **Learning:** `re.findall(pattern, string)` recompiles (or retrieves from cache) the pattern on every call. In high-frequency functions called inside loops (like complexity estimation), this overhead adds up.
 **Action:** Always pre-compile regexes (`re.compile`) into module-level or class-level constants if they are used repeatedly, especially in tight loops or recursive functions.
-**Action:** Always pre-compile regexes (`re.compile`) into module-level or class-level constants if they are used repeatedly, especially in tight loops or recursive functions.
+**Action:** Always pre-compile regexes (`re.compile`) into module-level or class-level constants if they are used repeatedly, especially in tight loops or recursive functions.
-**Action:** Always pre-compile regexes (`re.compile`) into module-level or class-level constants if they are used repeatedly, especially in tight loops or recursive functions.
+**Action:** Always pre-compile regexes (`re.compile`) into module-level or class-level constants if they are used repeatedly, especially in tight loops or recursive functions.
+## 2025-05-20 - O(N*M) loop to O(N) dict lookup in MaterialExtractor
+**Learning:** In `evolutia/material_extractor.py`, finding the solution for each exercise was using an O(N*M) nested loop. When scaling the number of exercises per material (e.g. from 10 to 500), extraction time jumped significantly due to quadratic complexity.
+**Action:** Always replace nested matching loops (especially those matching IDs or labels) with pre-computed O(N) lookup dictionaries. To preserve the original `break` behavior (first match wins), populate the dictionary using `if key not in dict: dict[key] = value`.
diff --git a/evolutia/material_extractor.py b/evolutia/material_extractor.py
@@ -6,28 +6,28 @@
 from typing import Dict, List, Optional, Union
 import logging
 import time
-
-try:
-    from utils.markdown_parser import (
-        read_markdown_file,
-        extract_frontmatter,
-        extract_exercise_blocks,
-        extract_solution_blocks,
-        resolve_include_path
-    )
-except ImportError:
-    from .utils.markdown_parser import (
-        read_markdown_file,
-        extract_frontmatter,
-        extract_exercise_blocks,
-        extract_solution_blocks,
-        resolve_include_path
-    )
-
-
-logger = logging.getLogger(__name__)
-
-
+
+try:
+    from utils.markdown_parser import (
+        read_markdown_file,
+        extract_frontmatter,
+        extract_exercise_blocks,
+        extract_solution_blocks,
+        resolve_include_path
+    )
+except ImportError:
+    from .utils.markdown_parser import (
+        read_markdown_file,
+        extract_frontmatter,
+        extract_exercise_blocks,
+        extract_solution_blocks,
+        resolve_include_path
+    )
+
+
+logger = logging.getLogger(__name__)
+
+
 class MaterialExtractor:
     """Extrae ejercicios y soluciones de materiales didácticos."""
 
@@ -47,7 +47,7 @@ def __init__(self, base_path: Union[Path, str]):
         self._last_scan_timestamp: float = 0
         # TTL del caché en segundos (5 minutos)
         self._cache_ttl = 300
-    
+
     def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
         """
         Extrae ejercicios y soluciones de un archivo Markdown.
@@ -65,19 +65,19 @@ def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
             return self._file_cache[file_path]['data']
 
         try:
-            content = read_markdown_file(file_path)
-            frontmatter, content_body = extract_frontmatter(content)
-            
-            exercises = extract_exercise_blocks(content_body)
-            solutions = extract_solution_blocks(content_body)
-            
-            # Resolver includes de ejercicios
-            for exercise in exercises:
-                if exercise['include_path']:
-                    include_path = resolve_include_path(
-                        exercise['include_path'],
-                        file_path.parent
-                    )
+            content = read_markdown_file(file_path)
+            frontmatter, content_body = extract_frontmatter(content)
+
+            exercises = extract_exercise_blocks(content_body)
+            solutions = extract_solution_blocks(content_body)
+
+            # Resolver includes de ejercicios
+            for exercise in exercises:
+                if exercise['include_path']:
+                    include_path = resolve_include_path(
+                        exercise['include_path'],
+                        file_path.parent
+                    )
                     if include_path.exists():
                         exercise['resolved_content'] = read_markdown_file(include_path)
                     else:
@@ -103,7 +103,7 @@ def extract_from_file(self, file_path: Path, use_cache: bool = True) -> Dict:
                     solution['resolved_content'] = '\n\n---\n\n'.join(resolved_content_parts)
                 else:
                     solution['resolved_content'] = solution['content']
-            
+
             return {
                 'file_path': file_path,
                 'frontmatter': frontmatter,
@@ -153,118 +153,121 @@ def extract_from_directory(self, directory: Path, pattern: str = "*.md") -> List
         directory = Path(directory)
         if not directory.exists():
             logger.warning(f"[MaterialExtractor] Directorio no existe: {directory}")
-            return []
-
-        materials = []
-        for md_file in directory.rglob(pattern):
-            # Ignorar archivos en _build y otros directorios temporales
-            if '_build' in md_file.parts or 'node_modules' in md_file.parts:
-                continue
-
-            material = self.extract_from_file(md_file)
-            # Incluirlos si tienen ejercicios/soluciones O si parecen ser materiales de lectura/teoría
-            if material['exercises'] or material['solutions'] or 'lectura' in md_file.name.lower() or 'teoria' in md_file.name.lower():
-                materials.append(material)
-
-        return materials
-
-    def extract_by_topic(self, topic: str) -> List[Dict]:
-        """
-        Extrae materiales de un tema específico.
-
-        Busca en:
-        - {topic}/semana*_practica.md
-        - {topic}/semana*_lectura.md
-        - tareas/tarea*/tarea*.md
-
-        Args:
-            topic: Nombre del tema (ej: "analisis_vectorial")
-
-        Returns:
-            Lista de materiales extraídos
-        """
-        materials = []
-
-        # Buscar en directorio del tema
-        topic_dir = self.base_path / topic
-        if topic_dir.exists():
-            # Buscar prácticas
-            practice_files = list(topic_dir.glob("*practica*.md"))
-            for file in practice_files:
-                materials.append(self.extract_from_file(file))
-
-            # Buscar lecturas (pueden tener ejercicios)
-            reading_files = list(topic_dir.glob("*lectura*.md"))
-            for file in reading_files:
-                materials.append(self.extract_from_file(file))
-
-        # Buscar en tareas (pueden ser de múltiples temas)
-        tareas_dir = self.base_path / "tareas"
-        if tareas_dir.exists():
-            for tarea_dir in tareas_dir.iterdir():
-                if tarea_dir.is_dir():
-                    tarea_file = tarea_dir / f"{tarea_dir.name}.md"
-                    if tarea_file.exists():
-                        material = self.extract_from_file(tarea_file)
-                        # Filtrar por tema si es relevante (checking subject or tags)
-                        subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
-                        tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
-                        if subject_match or tags_match:
-                            materials.append(material)
-
-        # Buscar en examenes (pueden ser de múltiples temas) 
-        examenes_dir = self.base_path / "examenes"
-        if examenes_dir.exists():
-            for examen_dir in examenes_dir.iterdir():
-                if examen_dir.is_dir():
-                    examen_file = examen_dir / f"{examen_dir.name}.md"
-                    if examen_file.exists():
-                        material = self.extract_from_file(examen_file)
-                        # Filtrar por tema si es relevante
-                        subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
-                        tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
-
-                        # Si es examen, a veces no tiene subject especifico o tiene "Examen X".
-                        # Si no hay match explícito, tal vez incluirlo si no se encontraron otros materiales?
-                        # Para seguridad, requerimos algún match en subject, tags o keywords
-                        keywords_match = any(topic.lower() in kw.lower() for kw in material['frontmatter'].get('keywords', []))
-
-                        if subject_match or tags_match or keywords_match:
-                            materials.append(material)
-
-        return materials
-
-    def get_all_exercises(self, materials: List[Dict]) -> List[Dict]:
-        """
-        Obtiene todos los ejercicios de una lista de materiales.
-
-        Args:
-            materials: Lista de materiales extraídos
-
-        Returns:
-            Lista de ejercicios con sus metadatos
-        """
-        all_exercises = []
-
-        for material in materials:
-            for exercise in material['exercises']:
-                # Buscar solución correspondiente
-                solution = None
-                for sol in material['solutions']:
-                    if sol['exercise_label'] == exercise['label']:
-                        solution = sol
-                        break
-
-                exercise_data = {
-                    'label': exercise['label'],
-                    'content': exercise['resolved_content'],
-                    'source_file': material['file_path'],
-                    'frontmatter': material['frontmatter'],
-                    'solution': solution['resolved_content'] if solution else None,
-                    'solution_label': solution['label'] if solution else None
-                }
-                all_exercises.append(exercise_data)
-
+            return []
+
+        materials = []
+        for md_file in directory.rglob(pattern):
+            # Ignorar archivos en _build y otros directorios temporales
+            if '_build' in md_file.parts or 'node_modules' in md_file.parts:
+                continue
+
+            material = self.extract_from_file(md_file)
+            # Incluirlos si tienen ejercicios/soluciones O si parecen ser materiales de lectura/teoría
+            if material['exercises'] or material['solutions'] or 'lectura' in md_file.name.lower() or 'teoria' in md_file.name.lower():
+                materials.append(material)
+
+        return materials
+
+    def extract_by_topic(self, topic: str) -> List[Dict]:
+        """
+        Extrae materiales de un tema específico.
+
+        Busca en:
+        - {topic}/semana*_practica.md
+        - {topic}/semana*_lectura.md
+        - tareas/tarea*/tarea*.md
+
+        Args:
+            topic: Nombre del tema (ej: "analisis_vectorial")
+
+        Returns:
+            Lista de materiales extraídos
+        """
+        materials = []
+
+        # Buscar en directorio del tema
+        topic_dir = self.base_path / topic
+        if topic_dir.exists():
+            # Buscar prácticas
+            practice_files = list(topic_dir.glob("*practica*.md"))
+            for file in practice_files:
+                materials.append(self.extract_from_file(file))
+
+            # Buscar lecturas (pueden tener ejercicios)
+            reading_files = list(topic_dir.glob("*lectura*.md"))
+            for file in reading_files:
+                materials.append(self.extract_from_file(file))
+
+        # Buscar en tareas (pueden ser de múltiples temas)
+        tareas_dir = self.base_path / "tareas"
+        if tareas_dir.exists():
+            for tarea_dir in tareas_dir.iterdir():
+                if tarea_dir.is_dir():
+                    tarea_file = tarea_dir / f"{tarea_dir.name}.md"
+                    if tarea_file.exists():
+                        material = self.extract_from_file(tarea_file)
+                        # Filtrar por tema si es relevante (checking subject or tags)
+                        subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
+                        tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
+                        if subject_match or tags_match:
+                            materials.append(material)
+
+        # Buscar en examenes (pueden ser de múltiples temas)
+        examenes_dir = self.base_path / "examenes"
+        if examenes_dir.exists():
+            for examen_dir in examenes_dir.iterdir():
+                if examen_dir.is_dir():
+                    examen_file = examen_dir / f"{examen_dir.name}.md"
+                    if examen_file.exists():
+                        material = self.extract_from_file(examen_file)
+                        # Filtrar por tema si es relevante
+                        subject_match = material['frontmatter'].get('subject', '').lower().find(topic.lower()) != -1
+                        tags_match = any(topic.lower() in tag.lower() for tag in material['frontmatter'].get('tags', []))
+
+                        # Si es examen, a veces no tiene subject especifico o tiene "Examen X".
+                        # Si no hay match explícito, tal vez incluirlo si no se encontraron otros materiales?
+                        # Para seguridad, requerimos algún match en subject, tags o keywords
+                        keywords_match = any(topic.lower() in kw.lower() for kw in material['frontmatter'].get('keywords', []))
+
+                        if subject_match or tags_match or keywords_match:
+                            materials.append(material)
+
+        return materials
+
+    def get_all_exercises(self, materials: List[Dict]) -> List[Dict]:
+        """
+        Obtiene todos los ejercicios de una lista de materiales.
+
+        Args:
+            materials: Lista de materiales extraídos
+
+        Returns:
+            Lista de ejercicios con sus metadatos
+        """
+        all_exercises = []
+
+        for material in materials:
+            # OPTIMIZATION: Pre-compute solutions dictionary for O(1) lookup
+            # instead of O(N*M) nested loops to significantly improve performance.
-            # OPTIMIZATION: Pre-compute solutions dictionary for O(1) lookup
-            # instead of O(N*M) nested loops to significantly improve performance.
+            # OPTIMIZACIÓN: Precalcular el diccionario de soluciones para búsqueda O(1)
+            # en lugar de usar bucles anidados O(N*M), mejorando significativamente el rendimiento.
-            # OPTIMIZATION: Pre-compute solutions dictionary for O(1) lookup
-            # instead of O(N*M) nested loops to significantly improve performance.
+            # OPTIMIZACIÓN: Precalcular el diccionario de soluciones para búsqueda O(1)
+            # en lugar de usar bucles anidados O(N*M), mejorando significativamente el rendimiento.
+            solutions_by_ex = {}
+            for sol in material['solutions']:
+                if sol['exercise_label'] not in solutions_by_ex:
+                    solutions_by_ex[sol['exercise_label']] = sol
+
+            for exercise in material['exercises']:
+                # Buscar solución correspondiente usando búsqueda O(1)
+                solution = solutions_by_ex.get(exercise['label'])
+
+                exercise_data = {
+                    'label': exercise['label'],
+                    'content': exercise['resolved_content'],
+                    'source_file': material['file_path'],
+                    'frontmatter': material['frontmatter'],
+                    'solution': solution['resolved_content'] if solution else None,
+                    'solution_label': solution['label'] if solution else None
+                }
+                all_exercises.append(exercise_data)
+
         return all_exercises
 
     def clear_cache(self):