Add --cli flag and show unified man page on --help

Saying `llamafile --help` will now display a unified man page, on all operating systems using the system pagination program (less or more). The llamafile and llamafile-server manuals have been unified into one coherent story. Extensive examples have been added. This unbreaks the recent breakage of --help caused by b86dcb7. In order to avoid breaking any upstream behavior, a new --cli flag is added which lets you explicitly let llamafile be in command line mode which might be useful for anyone who wanted ex nihilo token sampling.
Mozilla-Ocho · Jan 2, 2024 · ff86733 · ff86733
1 parent 1dcf274
commit ff86733
Show file tree

Hide file tree

Showing 12 changed files with 1,068 additions and 258 deletions.
diff --git a/Makefile b/Makefile
@@ -23,7 +23,6 @@ o/$(MODE)/: o/$(MODE)/llama.cpp o/$(MODE)/llamafile
 .PHONY: install
 install:	llamafile/zipalign.1					\
 		llama.cpp/main/main.1					\
-		llama.cpp/server/server.1				\
 		llama.cpp/quantize/quantize.1				\
 		llama.cpp/perplexity/perplexity.1			\
 		llama.cpp/llava/llava-quantize.1			\
@@ -42,7 +41,6 @@ install:	llamafile/zipalign.1					\
 	mkdir -p $(PREFIX)/share/man/man1
 	$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
 	$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
-	$(INSTALL) -m 0644 llama.cpp/server/server.1 $(PREFIX)/share/man/man1/llamafile-server.1
 	$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
 	$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
 	$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1

diff --git a/README.md b/README.md
@@ -275,9 +275,8 @@ weights:
 ```sh
 llamafile \
   -m wizardcoder-python-13b-v1.0.Q8_0.gguf \
-  --temp 0 -e \
-  -r '```\n' \
-  -p '```c\nvoid *memcpy(char *dst, const char *src, size_t size) {\n'
+  --temp 0 -r '}\n' -r '```\n' \
+  -e -p '```c\nvoid *memcpy(void *dst, const void *src, size_t size) {\n'
 ```
 
 Here's a similar example that instead utilizes Mistral-7B-Instruct
@@ -314,7 +313,7 @@ Here's an example of how you can use llamafile to summarize HTML URLs:
     sed 's/   */ /g'
   echo '[/INST]'
 ) | llamafile \
-      -m mistral-7b-instruct-v0.1.Q4_K_M.gguf \
+      -m mistral-7b-instruct-v0.2.Q5_K_M.gguf \
       -f /dev/stdin \
       -c 0 \
       --temp 0 \
@@ -329,7 +328,7 @@ llamafile --temp 0 \
   --image ~/Pictures/lemurs.jpg \
   -m llava-v1.5-7b-Q4_K.gguf \
   --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf \
-  -p $'### User: What do you see?\n### Assistant: ' \
+  -e -p '### User: What do you see?\n### Assistant: ' \
   --silent-prompt 2>/dev/null
 ```
 
@@ -341,11 +340,11 @@ wanted to write a script to rename all your image files, you could say:
 
 ```sh
 llamafile --temp 0 \
-    --image ~/Pictures/lemurs.jpg \
+    --image lemurs.jpg \
     -m llava-v1.5-7b-Q4_K.gguf \
     --mmproj llava-v1.5-7b-mmproj-Q4_0.gguf \
     --grammar 'root ::= [a-z]+ (" " [a-z]+)+' \
-    -p $'### User: What do you see?\n### Assistant: ' \
+    -e -p '### User: What do you see?\n### Assistant: ' \
     --silent-prompt 2>/dev/null |
   sed -e's/ /_/g' -e's/$/.jpg/'
 a_baby_monkey_on_the_back_of_a_mother.jpg

diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
@@ -154,7 +154,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             std::replace(arg.begin(), arg.end(), '_', '-');
         }
 
-        if (arg == "-s" || arg == "--seed") {
+        if (arg == "--cli") {
+            // do nothing
+        } else if (arg == "-s" || arg == "--seed") {
             if (++i >= argc) {
                 invalid_param = true;
                 break;

diff --git a/llama.cpp/main/BUILD.mk b/llama.cpp/main/BUILD.mk
@@ -13,8 +13,13 @@ o/$(MODE)/llama.cpp/main/main:					\
 		o/$(MODE)/llama.cpp/server/server.a		\
 		o/$(MODE)/llama.cpp/llava/llava.a		\
 		o/$(MODE)/llama.cpp/llama.cpp.a			\
+		o/$(MODE)/llama.cpp/main/main.1.asc.zip.o	\
 		$(LLAMA_CPP_SERVER_ASSETS:%=o/$(MODE)/%.zip.o)
 
+llama.cpp/main/main.1.asc: llama.cpp/main/main.1
+	-man $< >$@.tmp && mv -f $@.tmp $@
+	@rm -f $@.tmp
+
 .PHONY: o/$(MODE)/llama.cpp/main
 o/$(MODE)/llama.cpp/main:					\
 		o/$(MODE)/llama.cpp/main/main