[DRAFT]build: build llama.cpp with cygwin on Windows #12215

zhouwg · 2025-03-06T01:14:22Z

this draft PR introduce a new approach of build llama.cpp on Windows, which no complex MS's IDE is required.

official approach of build llama.cpp on Windows

as official doc mentioned: Please develop/build on Windows for ARM according to the llama.cpp build instructions, section "Building for Windows (x86, x64 and arm64) with MSVC or clang as compilers" with clang as c/c++ compiler (MSVC is no longer supported for llama.cpp on Windows for ARM because of the arm CPU Q4_0 optimization inline code).

there are some problems with this approach and this approach seems a little difficult from point of view of Linux programmer:

a simple thing on Linux such as configure CMake in command line mode or script is not easy on Windows(because I know nothing about Windows programming)
lots of mysterious compile errors with MS's compiler and toolchain for a same llam.cpp derived project which can be easily built for Linux and Android
have to install and setup a very big IDE on Windows and I don't know how this huge IDE works

an experimental approach which introduced in this PR
[1.5 step of following 3 steps(x86-windows, x86-windows-qnn, arm64-windows-qnn) has verified on a 64-bit Windows10 VM through VMWare Player17 on a powerful Linux workstation]

download and install Qualcomm QNN SDK on Windows accordingly from https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk, put them in C:\qairt\2.31.0.250130
download and install cygwin accordingly from https://www.cygwin.com/
put following script build-run-windows.sh to <path_of_llama.cpp>/scripts/build-run-windows.sh

#!/bin/bash

set -e

PWD=`pwd`
PREFIX_PATH=/cygdrive/c
GGUF_MODEL_NAME=${PREFIX_PATH}/qwen1_5-1_8b-chat-q4_0.gguf

#QNN SDK could be found at:
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
QNN_SDK_URL=https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
QNN_SDK_PATH=${PREFIX_PATH}/qairt/2.31.0.250130/

#default is QNN NPU
qnnbackend=2

function dump_vars()
{
    echo -e "QNN_SDK_PATH:         ${QNN_SDK_PATH}"
}


function show_pwd()
{
    echo -e "current working path:$(pwd)\n"
}


function check_qnn_sdk()
{
    if [ ! -d ${QNN_SDK_PATH} ]; then
        echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check or download it from ${QNN_SDK_URL}...\n"
        exit 1
    fi
}

function build_windows_x86
{
    echo "build_windows_x86-without-qnn"
    cmake -H. -B./out/windows_x86 -DCMAKE_BUILD_TYPE=Release
    cd out/windows_x86
    make -j16
    show_pwd

    cd -
}

function build_windows_x86_qnn
{
    echo "build_windows_x86-with-qnn"
    cmake -H. -B./out/windows_x86_qnn -DCMAKE_BUILD_TYPE=Release -DGGML_USE_QNN=ON -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
    cd out/windows_x86_qnn
    make -j16
    show_pwd

    cd -
}

function build_windows_arm64_qnn
{
    echo "build_windows_arm64 not supported now"
    #cmake -H. -B./out/windows_arm64_qnn -DCMAKE_BUILD_TYPE=Release -DGGML_USE_QNN=ON -DCMAKE_TOOLCHAIN_FILE=${MSSDK}/cmake/arm64-windows-llvm.cmake -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
}


function remove_temp_dir()
{
    if [ -d out/windows_x86 ]; then
        echo "remove out/windows_x86 directory in `pwd`"
        rm -rf out/windows_x86
    fi
}


function check_qnn_libs()
{
    echo "do nothing"
}


function update_qnn_libs()
{
    echo "do nothing"
}

function build_x86()
{
    show_pwd
    check_qnn_sdk
    dump_vars
    #some unexpected behaviour on Windows
    #remove_temp_dir
    build_windows_x86
}

function build_x86_qnn()
{
    show_pwd
    check_qnn_sdk
    dump_vars
    #some unexpected behaviour on Windows
    #remove_temp_dir
    build_windows_x86_qnn
}

function build_arm64_qnn()
{
    show_pwd
    check_qnn_sdk
    dump_vars
    #some unexpected behaviour on Windows
    #remove_temp_dir
    build_windows_arm64_qnn
}

function run_llamacli()
{
    check_qnn_libs
    echo "not supported on Windows now"

    #llama-cli -mg ${qnnbackend} -no-cnv -m ${GGUF_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\"

}


function run_llamabench()
{
    check_qnn_libs
    echo "not supported on Windows now"

    #llama-bench -mg ${qnnbackend} -m ${GGUF_MODEL_NAME}"

}


function run_test-backend-ops()
{
    check_qnn_libs
    echo "not supported on Windows now"

    #test-backend-ops test"

}


function show_usage()
{
    echo "Usage:"
    echo "  $0 build_x86"
    echo "  $0 build_x86_qnn"
    echo "  $0 build_arm64_qnn"
    echo "  $0 run_testop"
    echo "  $0 run_llamacli     0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU) / 3 (ggml)"
    echo "  $0 run_llamabench   0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU) / 3 (ggml)"
    echo -e "\n\n\n"
}


show_pwd

check_qnn_sdk

if [ $# == 0 ]; then
    show_usage
    exit 1
elif [ $# == 1 ]; then
    if [ "$1" == "-h" ]; then
        show_usage
        exit 1
    elif [ "$1" == "help" ]; then
        show_usage
        exit 1
    elif [ "$1" == "build_x86" ]; then
        build_x86
        exit 0
    elif [ "$1" == "build_x86_qnn" ]; then
        build_x86_qnn
        exit 0
    elif [ "$1" == "build_arm64_qnn" ]; then
        build_arm64_qnn
        exit 0

    elif [ "$1" == "run_testop" ]; then
        run_test-backend-ops
        exit 0
    else
        show_usage
        exit 1
    fi
elif [ $# == 2 ]; then
    qnnbackend=$2
    if [ ${qnnbackend} -gt 3 ]; then
        show_usage
        exit 1
    fi

    if [ "$1" == "run_llamacli" ]; then
        run_llamacli
        exit 0
    elif [ "$1" == "run_llamabench" ]; then
        run_llamabench
        exit 0
    fi
else
    show_usage
    exit 1
fi

4.disclaimer

this approach is not the official recommended approach of build llama.cpp on x86-Windows or WoA(Windows on ARM),it's an experimental approach and I have to suspend this experimental because I have no Snapdragon desktop SoC equipped SurfacePro Copilot+PC currently(the cheapest SurfacePro's price is about USD1100 and I already bought a high-end Snapdragon 8 Gen3 mobile SoC equipped phone for relative dev activity and I already have a powerful Dell Linux workstation).

zhouwg added 2 commits March 6, 2025 09:07

build: build llama.cpp with cygwin on Windows, without complex IDE

1cec9f1

build: build with cygwin on Windows --- make CI happy

bcbd796

zhouwg marked this pull request as draft March 6, 2025 04:08

zhouwg mentioned this pull request Mar 6, 2025

PR: Refine ggml-qnn backend(QNN, Qualcomm Neural Network,aka Qualcomm AI Engine Direct) for latest ggml,whisper.cpp,llama.cpp #12049

Closed

1 task

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DRAFT]build: build llama.cpp with cygwin on Windows #12215

[DRAFT]build: build llama.cpp with cygwin on Windows #12215

zhouwg commented Mar 6, 2025 •

edited

Loading

[DRAFT]build: build llama.cpp with cygwin on Windows #12215

Are you sure you want to change the base?

[DRAFT]build: build llama.cpp with cygwin on Windows #12215

Conversation

zhouwg commented Mar 6, 2025 • edited Loading

zhouwg commented Mar 6, 2025 •

edited

Loading