| import torch |
| import gradio as gr |
| from PIL import Image |
| from torchvision import transforms |
| from statistics import mean |
| Image.MAX_IMAGE_PIXELS = None |
|
|
| def read_img_file(f): |
| img = Image.open(f) |
| if img.mode != 'RGB': |
| img = img.convert('RGB') |
| return img |
|
|
| _transform_test_random=transforms.Compose([ |
| transforms.RandomCrop((256,256)), |
| transforms.ToTensor(), |
| transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) |
|
|
| _transform_test_random_vit = transforms.Compose([ |
| transforms.RandomCrop((252,252)), |
| transforms.ToTensor(), |
| transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) |
|
|
| def detect(img, model_choices): |
| if model_choices == "EVA-02 ViT L/14": |
| model = torch.load("./model_eva.pth",map_location="cpu").cpu().eval() |
| _transform = _transform_test_random_vit |
| elif model_choices == "ConvNext Large": |
| model = torch.load("./model_convnext.pth",map_location="cpu").cpu().eval() |
| _transform = _transform_test_random |
| elif model_choices == "EfficientNet-V2 B0": |
| model = torch.load("./model_effnet.pth",map_location="cpu").cpu().eval() |
| _transform = _transform_test_random |
|
|
| output = "" |
| with torch.inference_mode(): |
| tmp=[] |
| for _ in range(10): |
| img_random_crop = _transform(img) |
| outputs = model.forward(img_random_crop.unsqueeze(0)) |
| outputs = torch.sigmoid(outputs).cpu().numpy() |
| tmp.append(outputs[0][0]) |
| output+=f"{str(tmp)}\n" |
| output+=f"10 try method: {mean(tmp)}\n" |
| |
| |
|
|
| with torch.inference_mode(): |
| img_crop = _transform(img) |
| outputs = model.forward(img_crop.unsqueeze(0)) |
| outputs = torch.sigmoid(outputs).cpu().numpy() |
| output+=f"1 try method: {outputs}\n" |
| |
| return output |
|
|
| model_choices = ["ConvNext Large", "EVA-02 ViT L/14", "EfficientNet-V2 B0"] |
|
|
| descr = f""" |
| Detecting AutoEncoder is Enough to Catch LDM Generated Images (https://arxiv.org/abs/2411.06441) |
| Code at https://github.com/qwertyforce/Detect_LDM_By_Detecting_VAE |
| Models at https://huggingface.co/qwertyforce/Detect_LDM_By_Detecting_VAE |
| """ |
| demo = gr.Interface(fn=detect, |
| inputs=[gr.Image(type="pil", label="Input Image"), |
| gr.Radio( |
| model_choices, |
| type="value", |
| value="EVA-02 ViT L/14", |
| label="Choose Detector Model", |
| )], outputs="text",title = "Detecting AutoEncoder is Enough to Catch LDM Generated Images",description=descr) |
| demo.launch() |