export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
export CUDA_HOME=/usr/local/cuda/
export PATH=$PATH:/usr/local/cuda/bin
# 3080Ti
cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="86"
# 5090D
cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="86;89"
cmake --build ./build/ --config Release -j 8
cd ./build/bin/
llama-server -h
./llama-server \
-m /data/ornith-1.0-9b-Q8_0.gguf \
--host 0.0.0.0 \
--port 8099 \
--flash-attn on \
--repeat-penalty 1.1 \
--reasoning-preserve \
--temperature 0.6 \
--top-p 0.95 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-c 262144
存在死循环的情况
./llama-server -m /data/Qwen3.6-35B-A3B-APEX-I-Quality.gguf \
--host 0.0.0.0 \
--port 8099 \
--flash-attn on \
--repeat-penalty 1.1 \
--reasoning-preserve \
--temperature 0.6 \
--top-p 0.95 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
--split-mode layer \
-b 2048 \
--no-mmap \
-c 262144 --n-cpu-moe 32 -ub 2048
256K上下文 -c 262144 --n-cpu-moe 30 -ub 512
3080Ti 推理409t/s,推理耗时52.64s,输出38t/s
256K上下文 -c 262144 --n-cpu-moe 32 -ub 2048
3080Ti 推理1203t/s,推理耗时19.96s,输出37t/s
192K上下文 -c 196608 --n-cpu-moe 29 -ub 512
3080Ti 推理419t/s,推理耗时51.34s,输出39t/s
192K上下文 -c 196608 --n-cpu-moe 30 -ub 2048
3080Ti 推理1285t/s,推理耗时18.89s,输出39t/s
160K上下文 -c 163840 --n-cpu-moe 27 -ub 512
3080Ti 推理442t/s,推理耗时48.84s,输出40t/s
160K上下文 -c 163840 --n-cpu-moe 29 -ub 2048
3080Ti 推理1314t/s,推理耗时18.52s,输出38t/s
128K上下文 -c 131072 --n-cpu-moe 26 -ub 512
3080Ti 推理456t/s,推理耗时47.56s,输出42t/s
128K上下文 -c 131072 --n-cpu-moe 27 -ub 2048
3080Ti 推理1377t/s,推理耗时17.68s,输出40t/s
5090D 32G:
./llama-server -m /data/Qwen3.6-35B-A3B-APEX-I-Balanced.gguf \
--host 0.0.0.0 \
--port 8099 \
--flash-attn on \
--repeat-penalty 1.1 \
--reasoning-preserve \
--temperature 0.6 \
--top-p 0.95 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
--split-mode layer \
-b 2048 \
--no-mmap \
-c 262144 --n-cpu-moe 0 -ub 2048
推理9138t/s,推理耗时0.5s,输出163t/s
设置Qt路径:
CMakeLists.txt
set(CMAKE_PREFIX_PATH "D:/Qt/Qt5.10.1/5.10.1/msvc2015")
set(Qt5_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Qt5")
set(Qt5Widgets_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Qt5Widgets")
set(Qt5Gui_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Gui")
set(Qt5Core_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Qt5Core")
set(Qt5Network_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Qt5Network")
set(Qt5LinguistTools_DIR "${CMAKE_PREFIX_PATH}/lib/cmake/Qt5LinguistTools")
set(QT_QMAKE_EXECUTABLE "${CMAKE_PREFIX_PATH}/bin/qmake.exe")
解决环境问题:
C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\Platforms\Win32\Platform.Default.props
Line 26:
v110改为v140
PlatformToolset... v140 PlatformToolset
添加编译类型:
.vscode/cmake-variants.json
{
"buildType": {
"default": "debug",
"description": "The type of build",
"choices": {
"debug": {
"short": "Debug",
"long": "Emit debug information without optimizing",
"buildType": "Debug"
},
"release": {
"short": "Release",
"long": "Optimize for speed without debug information",
"buildType": "Release"
},
"release_debug": {
"short": "Release_Debug",
"long": "Optimize with optimization and debug symbols for profiling",
"buildType": "Release_Debug"
}
}
}
}