brightmart · llbbl · Jun 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,99 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Virtual environments
+venv/
+ENV/
+env/
+.venv/
+.env
+
+# IDEs
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+.project
+.pydevproject
+.settings/
+.DS_Store
+
+# Claude
+.claude/*
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Project specific
+*.h5
+*.pik
+*.log
+*.out
+
+# Model checkpoints
+checkpoints/
+saved_models/
+model_weights/
+
+# Data files (keep the ones in data/ directory)
+!/data/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,25 @@
+# Project Setup
+
+This file contains project-specific information for Claude.
+
+## Testing Commands
+- Run tests: poetry run test or poetry run tests
+- Run with coverage: poetry run pytest --cov
+- Run unit tests only: poetry run pytest tests/unit
+- Run integration tests only: poetry run pytest tests/integration
+- Run with verbose output: poetry run pytest -v
+- Run specific test file: poetry run pytest tests/test_file.py
+- Run tests with specific marker: poetry run pytest -m unit
+
+Note: Coverage threshold is currently set to 0% for initial setup. Update to 80% in pyproject.toml when ready.
+
+## Code Quality Commands
+- Linting: TBD
+- Type checking: TBD
+
+## Project Structure
+- Main source code: model/ and root .py files
+- Tests: tests/
+- Documentation: docs/
+- Testing config: pyproject.toml
+
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,74 @@
+[tool.poetry]
+name = "bert-nlp-project"
+version = "0.1.0"
+description = "BERT-based NLP model implementation"
+authors = ["Your Name <[email protected]>"]
+readme = "README.md"
+packages = [{include = "model"}, {include = "*.py"}]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.3"
+pytest-cov = "^4.1.0"
+pytest-mock = "^3.12.0"
+
+[tool.poetry.scripts]
+test = "pytest:main"
+tests = "pytest:main"
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--cov=.",
+    "--cov-branch",
+    "--cov-report=term-missing:skip-covered",
+    "--cov-report=html",
+    "--cov-report=xml",
+    "--cov-fail-under=0",
+]
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py", "testing/python/*.py"]
+markers = [
+    "unit: Unit tests",
+    "integration: Integration tests", 
+    "slow: Tests that take a long time to run",
+]
+
+[tool.coverage.run]
+source = ["."]
+omit = [
+    "*/tests/*",
+    "*/test_*",
+    "*_test.py",
+    "setup.py",
+    "*/venv/*",
+    "*/.venv/*",
+    "*/virtualenv/*",
+    "*/.tox/*",
+    "*/.pytest_cache/*",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "if self.debug:",
+    "if settings.DEBUG",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if 0:",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "class .*\\bProtocol\\):",
+    "@(abc\\.)?abstractmethod",
+]
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,114 @@
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+from unittest.mock import Mock, MagicMock
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for test files."""
+    temp_dir = tempfile.mkdtemp()
+    yield Path(temp_dir)
+    shutil.rmtree(temp_dir)
+
+
+@pytest.fixture
+def mock_config():
+    """Mock configuration object for testing."""
+    config = Mock()
+    config.batch_size = 32
+    config.learning_rate = 0.001
+    config.epochs = 10
+    config.hidden_size = 768
+    config.num_attention_heads = 12
+    config.num_hidden_layers = 12
+    config.vocab_size = 30522
+    config.max_position_embeddings = 512
+    config.intermediate_size = 3072
+    config.hidden_act = "gelu"
+    config.hidden_dropout_prob = 0.1
+    config.attention_probs_dropout_prob = 0.1
+    return config
+
+
+@pytest.fixture
+def mock_model():
+    """Mock model object for testing."""
+    model = MagicMock()
+    model.predict = MagicMock(return_value=[0.1, 0.9])
+    model.train = MagicMock()
+    model.evaluate = MagicMock(return_value={"loss": 0.5, "accuracy": 0.85})
+    return model
+
+
+@pytest.fixture
+def sample_text_data():
+    """Sample text data for testing."""
+    return [
+        "This is a sample sentence for testing.",
+        "Another example text for our tests.",
+        "BERT models are powerful for NLP tasks.",
+    ]
+
+
+@pytest.fixture
+def sample_tokenized_data():
+    """Sample tokenized data for testing."""
+    return {
+        "input_ids": [[101, 2023, 2003, 1037, 6876, 102], 
+                      [101, 2178, 2742, 3793, 102],
+                      [101, 14324, 2819, 2024, 3928, 102]],
+        "attention_mask": [[1, 1, 1, 1, 1, 1],
+                          [1, 1, 1, 1, 1],
+                          [1, 1, 1, 1, 1, 1]],
+        "token_type_ids": [[0, 0, 0, 0, 0, 0],
+                          [0, 0, 0, 0, 0],
+                          [0, 0, 0, 0, 0, 0]]
+    }
+
+
+@pytest.fixture
+def mock_data_loader():
+    """Mock data loader for testing."""
+    loader = Mock()
+    loader.__iter__ = Mock(return_value=iter([
+        {"inputs": "sample1", "labels": 0},
+        {"inputs": "sample2", "labels": 1},
+    ]))
+    loader.__len__ = Mock(return_value=2)
+    return loader
+
+
+@pytest.fixture
+def capture_stdout(monkeypatch):
+    """Capture stdout for testing print statements."""
+    import io
+    import sys
+
+    captured_output = io.StringIO()
+    monkeypatch.setattr(sys, 'stdout', captured_output)
+    yield captured_output
+    captured_output.close()
+
+
+@pytest.fixture(autouse=True)
+def reset_random_seeds():
+    """Reset random seeds for reproducible tests."""
+    import random
+
+    random.seed(42)
+
+    try:
+        import numpy as np
+        np.random.seed(42)
+    except ImportError:
+        pass
+
+    try:
+        import torch
+        torch.manual_seed(42)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(42)
+    except ImportError:
+        pass
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
diff --git a/tests/test_infrastructure_validation.py b/tests/test_infrastructure_validation.py
@@ -0,0 +1,63 @@
+import pytest
+from pathlib import Path
+
+
+class TestInfrastructureValidation:
+    """Validation tests to ensure the testing infrastructure is properly set up."""
+
+    def test_pytest_is_working(self):
+        """Basic test to verify pytest is running."""
+        assert True
+
+    def test_fixtures_are_available(self, temp_dir, mock_config, mock_model):
+        """Test that our custom fixtures are working."""
+        assert isinstance(temp_dir, Path)
+        assert temp_dir.exists()
+
+        assert hasattr(mock_config, 'batch_size')
+        assert mock_config.batch_size == 32
+
+        assert hasattr(mock_model, 'predict')
+        result = mock_model.predict()
+        assert result == [0.1, 0.9]
+
+    def test_markers_are_defined(self):
+        """Test that custom markers are available."""
+        markers = pytest.mark._markers
+        assert 'unit' in markers or hasattr(pytest.mark, 'unit')
+        assert 'integration' in markers or hasattr(pytest.mark, 'integration')
+        assert 'slow' in markers or hasattr(pytest.mark, 'slow')
+
+    @pytest.mark.unit
+    def test_unit_marker(self):
+        """Test with unit marker."""
+        assert 1 + 1 == 2
+
+    @pytest.mark.integration
+    def test_integration_marker(self):
+        """Test with integration marker."""
+        assert "integration" in "integration test"
+
+    def test_project_structure_exists(self):
+        """Verify the project structure is as expected."""
+        project_root = Path(__file__).parent.parent
+
+        assert project_root.exists()
+        assert (project_root / "model").exists()
+        assert (project_root / "tests").exists()
+        assert (project_root / "tests" / "unit").exists()
+        assert (project_root / "tests" / "integration").exists()
+        assert (project_root / "pyproject.toml").exists()
+
+    def test_coverage_is_configured(self):
+        """Test that coverage is properly configured."""
+        project_root = Path(__file__).parent.parent
+        pyproject_path = project_root / "pyproject.toml"
+
+        assert pyproject_path.exists()
+
+        with open(pyproject_path, 'r') as f:
+            content = f.read()
+            assert "[tool.coverage.run]" in content
+            assert "[tool.pytest.ini_options]" in content
+            assert "--cov" in content
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py