Spaces:
Running
Running
| project: | |
| name: imagenet-project | |
| data: | |
| raw_dir: ./data/raw/ | |
| captions_file: ./data/captioning/annotations/train.json | |
| dataset_version: cls_raw-20260525-v2 | |
| # dataset_version: raw-20260509-v1 | |
| split: | |
| train_ratio: 0.7 | |
| val_ratio: 0.15 | |
| test_ratio: 0.15 | |
| train: | |
| seed: 42 | |
| # repeated experiment | |
| # seed: 7 | |
| # seed: 21 | |
| epochs: 20 | |
| batch_size: 32 | |
| num_workers: 4 | |
| device: cuda | |
| optimizer: adam | |
| preprocess: | |
| image_size: 224 | |
| normalize: true | |
| loss: | |
| name: cross_entropy | |
| ignore_index: pad_token | |
| evaluate: | |
| batch_size: 32 | |
| metrics: | |
| - bleu | |
| - rouge_l | |
| - meteor | |
| logging: | |
| use_wandb: true | |
| project_name: imagenet-project | |
| log_interval: 10 | |
| outputs: | |
| base_dir: outputs | |
| demo: | |
| host: 0.0.0.0 | |
| port: 7860 | |
| share: false | |
| top_k: 5 | |
| show_gradcam: true | |
| class_names : [airplane, apple, aster, banana, bicycle, bracelet, bulldog, bus, butterfly, car, carrot, cucumber, cup-cake, daisy, dandelion, dumpling, earrings, elephant, glasses, golden-retriever, hamburger, horse, iris, lavender, lily, marigold, motorcycle, necklace, orange, orchid, pants, pasta, penguin, persian-cat, pizza, rose, salad, sandwich, sheep, siamese-cat, sneakers, squirrel, steak, strawberry, sunflower, sushi, tomato, t-shirt, tulip, waffle] | |
| cnn: | |
| backbone: resnet18 | |
| pretrained: true | |
| freeze: true | |
| output_dim: 512 | |
| dropout: 0.3 | |
| pooling: avg | |
| captioning: | |
| # encoder: resnet18 | |
| encoder: swin | |
| # encoder: vit | |
| decoder: transformer | |
| # decoder: lstm | |
| # decoder: gru | |
| version: final | |
| epochs: 25 | |
| learning_rate: 0.0001 | |
| batch_size: 32 | |
| optimizer: adamw | |
| max_caption_length: 30 | |
| train_num_caption: 2 | |
| debug: False | |
| lstm: | |
| embed_dim: 256 | |
| hidden_dim: 512 | |
| num_layers: 1 | |
| gru: | |
| embed_dim: 256 | |
| hidden_dim: 512 | |
| num_layers: 1 | |
| transformer: | |
| n_layers: 6 | |
| nhead: 8 | |
| d_model: 512 | |
| drop_p: 0.3 | |
| label_smoothing: 0 | |
| weight_decay: 0.001 | |
| data: | |
| dataset_version: cap_raw-20260524-v1 | |
| train_img: ./data/captioning/raw/train/ | |
| train_caption: ./data/captioning/annotations/train.json | |
| val_img: ./data/captioning/raw/val/ | |
| val_caption: ./data/captioning/annotations/val.json | |
| test_img: ./data/captioning/raw/test/ | |
| test_caption: ./data/captioning/annotations/test.json | |
| tokenizer: | |
| min_freq: 3 | |
| max_vocab_size: 10000 | |
| sp_vocab_size: 2000 | |
| use_subword: False | |
| sp_model_path: ./src/dataset/sub_tokenizer2000.model | |
| checkpoint: | |
| save_dir: ./outputs/captioning | |
| final_checkpoint: swin-transformer_final_best.pt | |
| resume: False | |
| heatmap: | |
| dec_atten_dir: /workspace/outputs/captioning/heatmap/ | |
| enc_dec_atten_dir: /workspace/outputs/captioning/heatmap/ | |
| layer: 6 # 몇번째 층 | |
| sample: [0, 410, 820, 1230, 1640] # caption & heatmap 몇번째 샘플(batch) | |
| scheduler: | |
| use_scheduler: False | |
| warmup_step: 500 | |
| lr_scale: 0.5 | |
| beam_search: | |
| use_beam_search: True | |
| beam_size: 3 | |
| classification: | |
| # model_name: resnet18 | |
| # model_name: efficientnet_b0 | |
| # model_name: convnext_tiny | |
| # model_name: mobilenet_v3_small | |
| # model_name: vit_b_16 | |
| model_name: swin_t | |
| # model_name: deit_tiny_patch16_224 | |
| final_checkpoint: ./outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth | |
| epochs: 50 | |
| learning_rate: | |
| # baseline | |
| cnn: 0.001 | |
| transformer: 0.0005 | |
| # hyperparameter tuning | |
| # cnn: 0.0005 | |
| # transformer: 0.0001 | |
| # optimizer: adam | |
| # optimizer: sgd | |
| optimizer: adamw | |
| # default | |
| # weight_decay: 0.01 | |
| # tuning | |
| weight_decay: 0.05 | |
| scheduler: | |
| use: false | |
| # use: true | |
| # name: cosineannealinglr | |
| augmentation: | |
| # baseline | |
| use_aug: false | |
| type: none | |
| # mixup | |
| # use_aug: true | |
| # type: mixup | |
| # cutmix | |
| # use_aug: true | |
| # type: cutmix | |
| label_smoothing: 0.0 | |
| # label smoothing experiment | |
| # label_smoothing: 0.05 | |
| # label_smoothing: 0.1 | |
| metrics: | |
| train: | |
| - loss | |
| - accuracy | |
| validation: | |
| - loss | |
| - accuracy | |
| - macro_f1 | |
| final_test: | |
| - accuracy | |
| - macro_f1 | |
| - precision | |
| - recall | |
| - confusion_matrix | |
| checkpoint: | |
| save_dir: /workspace/outputs/classification | |
| latent_space: | |
| data_dir: /workspace/data/raw | |
| checkpoint: /workspace/outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth | |
| output_dir: /workspace/outputs/latent_space | |
| output_umap_npy: cls_swin-t_best_umap_2d_test_nb10_md05 | |
| output_umap_png: cls_swin-t_best_umap_plt_test_nb10_md05 | |
| output_meta_csv: cls_swin-t_best_metadata_test_nb10_md05 | |
| split: test | |
| batch_size: 32 | |
| num_workers: 4 | |
| device: cuda | |
| seed: 42 | |
| save_meta: true | |
| use_wandb: true | |
| wandb_name: latent_space_umap | |
| umap: | |
| n_neighbors: 10 | |
| min_dist: 0.5 | |
| metric: cosine | |