| | import functools |
| | from collections import OrderedDict |
| |
|
| | import gradio as gr |
| |
|
| | from modules import shared |
| |
|
| | loaders_and_params = OrderedDict({ |
| | 'Transformers': [ |
| | 'cpu_memory', |
| | 'gpu_memory', |
| | 'load_in_8bit', |
| | 'bf16', |
| | 'cpu', |
| | 'disk', |
| | 'auto_devices', |
| | 'load_in_4bit', |
| | 'use_double_quant', |
| | 'quant_type', |
| | 'compute_dtype', |
| | 'trust_remote_code', |
| | 'use_fast', |
| | 'use_flash_attention_2', |
| | 'alpha_value', |
| | 'rope_freq_base', |
| | 'compress_pos_emb', |
| | 'disable_exllama', |
| | 'transformers_info' |
| | ], |
| | 'ExLlama_HF': [ |
| | 'gpu_split', |
| | 'max_seq_len', |
| | 'alpha_value', |
| | 'rope_freq_base', |
| | 'compress_pos_emb', |
| | 'cfg_cache', |
| | 'use_fast', |
| | 'exllama_HF_info', |
| | ], |
| | 'ExLlamav2_HF': [ |
| | 'gpu_split', |
| | 'max_seq_len', |
| | 'cfg_cache', |
| | 'no_flash_attn', |
| | 'cache_8bit', |
| | 'alpha_value', |
| | 'compress_pos_emb', |
| | 'use_fast', |
| | ], |
| | 'ExLlama': [ |
| | 'gpu_split', |
| | 'max_seq_len', |
| | 'alpha_value', |
| | 'rope_freq_base', |
| | 'compress_pos_emb', |
| | 'exllama_info', |
| | ], |
| | 'ExLlamav2': [ |
| | 'gpu_split', |
| | 'max_seq_len', |
| | 'no_flash_attn', |
| | 'cache_8bit', |
| | 'alpha_value', |
| | 'compress_pos_emb', |
| | ], |
| | 'AutoGPTQ': [ |
| | 'triton', |
| | 'no_inject_fused_attention', |
| | 'no_inject_fused_mlp', |
| | 'no_use_cuda_fp16', |
| | 'wbits', |
| | 'groupsize', |
| | 'desc_act', |
| | 'disable_exllama', |
| | 'gpu_memory', |
| | 'cpu_memory', |
| | 'cpu', |
| | 'disk', |
| | 'auto_devices', |
| | 'trust_remote_code', |
| | 'use_fast', |
| | 'autogptq_info', |
| | ], |
| | 'GPTQ-for-LLaMa': [ |
| | 'wbits', |
| | 'groupsize', |
| | 'model_type', |
| | 'pre_layer', |
| | 'use_fast', |
| | 'gptq_for_llama_info', |
| | ], |
| | 'llama.cpp': [ |
| | 'n_ctx', |
| | 'n_gpu_layers', |
| | 'tensor_split', |
| | 'n_batch', |
| | 'threads', |
| | 'threads_batch', |
| | 'no_mmap', |
| | 'mlock', |
| | 'no_mul_mat_q', |
| | 'llama_cpp_seed', |
| | 'alpha_value', |
| | 'rope_freq_base', |
| | 'compress_pos_emb', |
| | 'cpu', |
| | 'numa', |
| | ], |
| | 'llamacpp_HF': [ |
| | 'n_ctx', |
| | 'n_gpu_layers', |
| | 'tensor_split', |
| | 'n_batch', |
| | 'threads', |
| | 'threads_batch', |
| | 'no_mmap', |
| | 'mlock', |
| | 'no_mul_mat_q', |
| | 'alpha_value', |
| | 'rope_freq_base', |
| | 'compress_pos_emb', |
| | 'cpu', |
| | 'numa', |
| | 'cfg_cache', |
| | 'use_fast', |
| | 'logits_all', |
| | 'llamacpp_HF_info', |
| | ], |
| | 'ctransformers': [ |
| | 'n_ctx', |
| | 'n_gpu_layers', |
| | 'n_batch', |
| | 'threads', |
| | 'model_type', |
| | 'no_mmap', |
| | 'mlock' |
| | ], |
| | 'AutoAWQ': [ |
| | 'cpu_memory', |
| | 'gpu_memory', |
| | 'auto_devices', |
| | 'max_seq_len', |
| | 'no_inject_fused_attention', |
| | 'trust_remote_code', |
| | 'use_fast', |
| | ] |
| | }) |
| |
|
| | loaders_samplers = { |
| | 'Transformers': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'penalty_alpha', |
| | 'num_beams', |
| | 'length_penalty', |
| | 'early_stopping', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'ExLlama_HF': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'ExLlama': { |
| | 'temperature', |
| | 'top_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'repetition_penalty', |
| | 'repetition_penalty_range', |
| | 'seed', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'add_bos_token', |
| | 'custom_token_bans', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'ExLlamav2': { |
| | 'temperature', |
| | 'top_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'repetition_penalty', |
| | 'repetition_penalty_range', |
| | 'seed', |
| | 'ban_eos_token', |
| | 'add_bos_token', |
| | 'custom_token_bans', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'ExLlamav2_HF': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'AutoGPTQ': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'penalty_alpha', |
| | 'num_beams', |
| | 'length_penalty', |
| | 'early_stopping', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'GPTQ-for-LLaMa': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'penalty_alpha', |
| | 'num_beams', |
| | 'length_penalty', |
| | 'early_stopping', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'llama.cpp': { |
| | 'temperature', |
| | 'top_p', |
| | 'top_k', |
| | 'tfs', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | }, |
| | 'llamacpp_HF': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | 'ctransformers': { |
| | 'temperature', |
| | 'top_p', |
| | 'top_k', |
| | 'repetition_penalty', |
| | 'repetition_penalty_range', |
| | }, |
| | 'AutoAWQ': { |
| | 'temperature', |
| | 'temperature_last', |
| | 'top_p', |
| | 'min_p', |
| | 'top_k', |
| | 'typical_p', |
| | 'epsilon_cutoff', |
| | 'eta_cutoff', |
| | 'tfs', |
| | 'top_a', |
| | 'repetition_penalty', |
| | 'presence_penalty', |
| | 'frequency_penalty', |
| | 'repetition_penalty_range', |
| | 'encoder_repetition_penalty', |
| | 'no_repeat_ngram_size', |
| | 'min_length', |
| | 'seed', |
| | 'do_sample', |
| | 'penalty_alpha', |
| | 'num_beams', |
| | 'length_penalty', |
| | 'early_stopping', |
| | 'mirostat_mode', |
| | 'mirostat_tau', |
| | 'mirostat_eta', |
| | 'grammar_file_row', |
| | 'grammar_string', |
| | 'guidance_scale', |
| | 'negative_prompt', |
| | 'ban_eos_token', |
| | 'custom_token_bans', |
| | 'add_bos_token', |
| | 'skip_special_tokens', |
| | 'auto_max_new_tokens', |
| | }, |
| | } |
| |
|
| | loaders_model_types = { |
| | 'GPTQ-for-LLaMa': [ |
| | "None", |
| | "llama", |
| | "opt", |
| | "gptj" |
| | ], |
| | 'ctransformers': [ |
| | "None", |
| | "gpt2", |
| | "gptj", |
| | "gptneox", |
| | "llama", |
| | "mpt", |
| | "dollyv2", |
| | "replit", |
| | "starcoder", |
| | "gptbigcode", |
| | "falcon" |
| | ], |
| | } |
| |
|
| |
|
| | @functools.cache |
| | def list_all_samplers(): |
| | all_samplers = set() |
| | for k in loaders_samplers: |
| | for sampler in loaders_samplers[k]: |
| | all_samplers.add(sampler) |
| |
|
| | return sorted(all_samplers) |
| |
|
| |
|
| | def blacklist_samplers(loader): |
| | all_samplers = list_all_samplers() |
| | if loader == 'All': |
| | return [gr.update(visible=True) for sampler in all_samplers] |
| | else: |
| | return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] |
| |
|
| |
|
| | def get_model_types(loader): |
| | if loader in loaders_model_types: |
| | return loaders_model_types[loader] |
| |
|
| | return ["None"] |
| |
|
| |
|
| | def get_gpu_memory_keys(): |
| | return [k for k in shared.gradio if k.startswith('gpu_memory')] |
| |
|
| |
|
| | @functools.cache |
| | def get_all_params(): |
| | all_params = set() |
| | for k in loaders_and_params: |
| | for el in loaders_and_params[k]: |
| | all_params.add(el) |
| |
|
| | if 'gpu_memory' in all_params: |
| | all_params.remove('gpu_memory') |
| | for k in get_gpu_memory_keys(): |
| | all_params.add(k) |
| |
|
| | return sorted(all_params) |
| |
|
| |
|
| | def make_loader_params_visible(loader): |
| | params = [] |
| | all_params = get_all_params() |
| | if loader in loaders_and_params: |
| | params = loaders_and_params[loader] |
| |
|
| | if 'gpu_memory' in params: |
| | params.remove('gpu_memory') |
| | params += get_gpu_memory_keys() |
| |
|
| | return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params] |
| |
|