File size: 952 Bytes
eef8873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import logging

from src.data.ingestion import collect_image_paths
from src.data.preprocessing import split_dataset

logger = logging.getLogger(__name__)


def test_preprocessing():
    logger.info("Testing preprocessing...")

    samples = collect_image_paths()

    train_data, val_data = split_dataset(samples)

    assert len(train_data) > 0, "Train split is empty"
    assert len(val_data) > 0, "Validation split is empty"

    train_paths = set(x[0] for x in train_data)
    val_paths = set(x[0] for x in val_data)

    overlap = train_paths.intersection(val_paths)

    assert len(overlap) == 0, "Train and validation overlap found"

    logger.info("Preprocessing test passed.")


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s"
    )

    test_preprocessing()

    print("Preprocessing test completed successfully.")