Instructions to use kernels-community/quantization-eetq with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Kernels
How to use kernels-community/quantization-eetq with Kernels:
# !pip install kernels from kernels import get_kernel kernel = get_kernel("kernels-community/quantization-eetq") - Notebooks
- Google Colab
- Kaggle
| /* | |
| * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| namespace fastertransformer { | |
| std::vector<CutlassGemmConfig> get_candidate_configs(int sm, const bool is_weight_only, const bool simt_configs_only); | |
| CutlassGemmConfig estimate_best_config_from_occupancies(const std::vector<CutlassGemmConfig>& candidate_configs, | |
| const std::vector<int>& occupancies, | |
| const int64_t m, | |
| const int64_t n, | |
| const int64_t k, | |
| const int64_t num_experts, | |
| const int split_k_limit, | |
| const size_t workspace_bytes, | |
| const int multi_processor_count, | |
| const int is_weight_only); | |
| } // namespace fastertransformer | |