# Clone repo git clone https://github.com/ggerganov/llama.cpp cd llama.cpp # Build mkdir build cd build cmake .. cmake --build . --config Release cd .. # Download model export MODEL=llama-2-13b-chat.ggmlv3.q4_0.bin if [ ! -f models/${MODEL} ]; then curl -L "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/${MODEL}" -o models/${MODEL} fi # Set prompt PROMPT="Hello! How are you?" # Run in interactive mode ./build/bin/main -m ./models/llama-2-13b-chat.ggmlv3.q4_0.bin \ --color \ --ctx_size 2048 \ -n -1 \ -ins -b 256 \ --top_k 10000 \ --temp 0.2 \ --repeat_penalty 1.1 \ -t 8