Mini-ImageNet / params.yaml
ImAMJayKIM's picture
Upload 96 files
c1596ac verified
project:
name: imagenet-project
data:
raw_dir: ./data/raw/
captions_file: ./data/captioning/annotations/train.json
dataset_version: cls_raw-20260525-v2
# dataset_version: raw-20260509-v1
split:
train_ratio: 0.7
val_ratio: 0.15
test_ratio: 0.15
train:
seed: 42
# repeated experiment
# seed: 7
# seed: 21
epochs: 20
batch_size: 32
num_workers: 4
device: cuda
optimizer: adam
preprocess:
image_size: 224
normalize: true
loss:
name: cross_entropy
ignore_index: pad_token
evaluate:
batch_size: 32
metrics:
- bleu
- rouge_l
- meteor
logging:
use_wandb: true
project_name: imagenet-project
log_interval: 10
outputs:
base_dir: outputs
demo:
host: 0.0.0.0
port: 7860
share: false
top_k: 5
show_gradcam: true
class_names : [airplane, apple, aster, banana, bicycle, bracelet, bulldog, bus, butterfly, car, carrot, cucumber, cup-cake, daisy, dandelion, dumpling, earrings, elephant, glasses, golden-retriever, hamburger, horse, iris, lavender, lily, marigold, motorcycle, necklace, orange, orchid, pants, pasta, penguin, persian-cat, pizza, rose, salad, sandwich, sheep, siamese-cat, sneakers, squirrel, steak, strawberry, sunflower, sushi, tomato, t-shirt, tulip, waffle]
cnn:
backbone: resnet18
pretrained: true
freeze: true
output_dim: 512
dropout: 0.3
pooling: avg
captioning:
# encoder: resnet18
encoder: swin
# encoder: vit
decoder: transformer
# decoder: lstm
# decoder: gru
version: final
epochs: 25
learning_rate: 0.0001
batch_size: 32
optimizer: adamw
max_caption_length: 30
train_num_caption: 2
debug: False
lstm:
embed_dim: 256
hidden_dim: 512
num_layers: 1
gru:
embed_dim: 256
hidden_dim: 512
num_layers: 1
transformer:
n_layers: 6
nhead: 8
d_model: 512
drop_p: 0.3
label_smoothing: 0
weight_decay: 0.001
data:
dataset_version: cap_raw-20260524-v1
train_img: ./data/captioning/raw/train/
train_caption: ./data/captioning/annotations/train.json
val_img: ./data/captioning/raw/val/
val_caption: ./data/captioning/annotations/val.json
test_img: ./data/captioning/raw/test/
test_caption: ./data/captioning/annotations/test.json
tokenizer:
min_freq: 3
max_vocab_size: 10000
sp_vocab_size: 2000
use_subword: False
sp_model_path: ./src/dataset/sub_tokenizer2000.model
checkpoint:
save_dir: ./outputs/captioning
final_checkpoint: swin-transformer_final_best.pt
resume: False
heatmap:
dec_atten_dir: /workspace/outputs/captioning/heatmap/
enc_dec_atten_dir: /workspace/outputs/captioning/heatmap/
layer: 6 # 몇번째 층
sample: [0, 410, 820, 1230, 1640] # caption & heatmap 몇번째 샘플(batch)
scheduler:
use_scheduler: False
warmup_step: 500
lr_scale: 0.5
beam_search:
use_beam_search: True
beam_size: 3
classification:
# model_name: resnet18
# model_name: efficientnet_b0
# model_name: convnext_tiny
# model_name: mobilenet_v3_small
# model_name: vit_b_16
model_name: swin_t
# model_name: deit_tiny_patch16_224
final_checkpoint: ./outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth
epochs: 50
learning_rate:
# baseline
cnn: 0.001
transformer: 0.0005
# hyperparameter tuning
# cnn: 0.0005
# transformer: 0.0001
# optimizer: adam
# optimizer: sgd
optimizer: adamw
# default
# weight_decay: 0.01
# tuning
weight_decay: 0.05
scheduler:
use: false
# use: true
# name: cosineannealinglr
augmentation:
# baseline
use_aug: false
type: none
# mixup
# use_aug: true
# type: mixup
# cutmix
# use_aug: true
# type: cutmix
label_smoothing: 0.0
# label smoothing experiment
# label_smoothing: 0.05
# label_smoothing: 0.1
metrics:
train:
- loss
- accuracy
validation:
- loss
- accuracy
- macro_f1
final_test:
- accuracy
- macro_f1
- precision
- recall
- confusion_matrix
checkpoint:
save_dir: /workspace/outputs/classification
latent_space:
data_dir: /workspace/data/raw
checkpoint: /workspace/outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth
output_dir: /workspace/outputs/latent_space
output_umap_npy: cls_swin-t_best_umap_2d_test_nb10_md05
output_umap_png: cls_swin-t_best_umap_plt_test_nb10_md05
output_meta_csv: cls_swin-t_best_metadata_test_nb10_md05
split: test
batch_size: 32
num_workers: 4
device: cuda
seed: 42
save_meta: true
use_wandb: true
wandb_name: latent_space_umap
umap:
n_neighbors: 10
min_dist: 0.5
metric: cosine