| | """ |
| | Minimum Functionality Tests (MFT) for Skill Classification Model |
| | |
| | These tests verify that the model performs well on basic, straightforward examples |
| | where the expected output is clear. The model should correctly predict skills for |
| | simple, unambiguous cases. |
| | |
| | Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models" |
| | |
| | Note: Expected labels will vary based on your actual label schema. |
| | These tests use common programming/software engineering skill categories. |
| | """ |
| | import pytest |
| | import numpy as np |
| |
|
| |
|
| | @pytest.mark.mft |
| | class TestMinimumFunctionality: |
| | """Test suite for minimum functionality on basic examples.""" |
| | |
| | def test_simple_bug_fix(self, predict_with_labels): |
| | """ |
| | Test prediction on a simple bug fix description. |
| | Should predict basic programming and error handling skills. |
| | """ |
| | text = "Fixed null pointer exception in user authentication" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | |
| | assert len(predictions) > 0, "Should predict at least one skill for a bug fix" |
| | |
| | def test_database_work(self, predict_with_labels): |
| | """ |
| | Test prediction on database-related work. |
| | Should predict database-related skills. |
| | """ |
| | text = "Implemented SQL query optimization for user table" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for database work" |
| | |
| | def test_api_development(self, predict_with_labels): |
| | """ |
| | Test prediction on API development work. |
| | Should predict API/web service related skills. |
| | """ |
| | text = "Created REST API endpoint for retrieving user data" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for API development" |
| | |
| | def test_data_structure_implementation(self, predict_with_labels): |
| | """ |
| | Test prediction on data structure implementation. |
| | Should predict data structure and algorithm skills. |
| | """ |
| | text = "Implemented binary search tree with insert and delete operations" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for data structure work" |
| | |
| | def test_testing_work(self, predict_with_labels): |
| | """ |
| | Test prediction on testing-related work. |
| | Should predict testing skills. |
| | """ |
| | text = "Added unit tests for authentication module using JUnit" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for testing work" |
| | |
| | def test_frontend_work(self, predict_with_labels): |
| | """ |
| | Test prediction on frontend development work. |
| | Should predict frontend/UI related skills. |
| | """ |
| | text = "Updated user interface with React components for login page" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for frontend work" |
| | |
| | def test_security_work(self, predict_with_labels): |
| | """ |
| | Test prediction on security-related work. |
| | Should predict security skills. |
| | """ |
| | text = "Implemented OAuth2 authentication with password encryption" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for security work" |
| | |
| | def test_performance_optimization(self, predict_with_labels): |
| | """ |
| | Test prediction on performance optimization work. |
| | Should predict performance/optimization skills. |
| | """ |
| | text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for performance work" |
| | |
| | def test_devops_deployment(self, predict_with_labels): |
| | """ |
| | Test prediction on DevOps/deployment work. |
| | Should predict DevOps skills. |
| | """ |
| | text = "Configured Docker container and CI/CD pipeline for automated deployment" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for DevOps work" |
| | |
| | def test_error_handling(self, predict_with_labels): |
| | """ |
| | Test prediction on error handling work. |
| | Should predict error handling skills. |
| | """ |
| | text = "Added try-catch blocks and proper exception handling for file operations" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for error handling work" |
| | |
| | def test_refactoring_work(self, predict_with_labels): |
| | """ |
| | Test prediction on code refactoring. |
| | Should predict code quality/refactoring skills. |
| | """ |
| | text = "Refactored legacy code to improve maintainability and readability" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for refactoring work" |
| | |
| | def test_documentation_work(self, predict_with_labels): |
| | """ |
| | Test prediction on documentation work. |
| | Should predict documentation skills. |
| | """ |
| | text = "Updated API documentation with examples and usage guidelines" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for '{text}':") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for documentation work" |
| | |
| | def test_empty_input(self, predict_with_labels): |
| | """ |
| | Test that model handles empty input gracefully. |
| | """ |
| | text = "" |
| | predictions = predict_with_labels(text) |
| | |
| | |
| | |
| | assert isinstance(predictions, list), "Should return a list for empty input" |
| | |
| | def test_minimal_input(self, predict_with_labels): |
| | """ |
| | Test that model handles very short input. |
| | """ |
| | text = "bug" |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for minimal input '{text}':") |
| | print(f" {predictions}") |
| | |
| | |
| | assert isinstance(predictions, list), "Should return a list for minimal input" |
| | |
| | def test_multiple_skills_in_one_task(self, predict_with_labels): |
| | """ |
| | Test that model can predict multiple skills for complex tasks. |
| | |
| | A task involving multiple technologies should predict multiple relevant skills. |
| | """ |
| | text = ( |
| | "Implemented user authentication API with JWT tokens, " |
| | "PostgreSQL database integration, and Redis caching" |
| | ) |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for multi-skill task:") |
| | print(f" {predictions}") |
| | |
| | |
| | assert len(predictions) >= 2, ( |
| | f"Complex multi-technology task should predict multiple skills, " |
| | f"got {len(predictions)}: {predictions}" |
| | ) |
| | |
| | def test_common_github_issue_format(self, predict_with_labels): |
| | """ |
| | Test on realistic GitHub issue format. |
| | """ |
| | text = """ |
| | ## Description |
| | Fixed a bug where the login API was throwing 500 errors |
| | |
| | ## Changes |
| | - Added null check in UserService |
| | - Improved error handling |
| | - Updated unit tests |
| | """ |
| | predictions = predict_with_labels(text) |
| | |
| | print(f"\nPredictions for GitHub-style issue:") |
| | print(f" {predictions}") |
| | |
| | assert len(predictions) > 0, "Should predict skills for realistic issue format" |
| | |
| | def test_consistency_on_similar_inputs(self, predict_text): |
| | """ |
| | Test that similar inputs produce similar predictions. |
| | """ |
| | text1 = "Fixed authentication bug" |
| | text2 = "Fixed authentication bug" |
| | text3 = "Resolved authentication bug" |
| | |
| | pred1 = set(predict_text(text1)) |
| | pred2 = set(predict_text(text2)) |
| | pred3 = set(predict_text(text3)) |
| | |
| | |
| | assert pred1 == pred2, "Identical inputs should produce identical predictions" |
| | |
| | |
| | intersection = len(pred1 & pred3) |
| | union = len(pred1 | pred3) |
| | |
| | if union > 0: |
| | similarity = intersection / union |
| | assert similarity >= 0.7, ( |
| | f"Very similar inputs should produce similar predictions. " |
| | f"Similarity: {similarity:.2f}" |
| | ) |
| |
|