Skip to content

Commit 1d3d1ff

Browse files
committed
allow shortening arguments
1 parent 57203b1 commit 1d3d1ff

File tree

4 files changed

+883
-198
lines changed

4 files changed

+883
-198
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
.DS_Store
22
Thumbs.db
33
_gitignore
4+
5+
curl_params.pickle

extract_curl_args.py

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
#!/usr/bin/env python3
2+
3+
from pathlib import Path
4+
import warnings
5+
import pickle
6+
import sys
7+
import subprocess
8+
import itertools
9+
from operator import itemgetter
10+
11+
# TODO: make this and the location of the repo relative to script location
12+
# TODO: and command line args
13+
OUTPUT_FILE = Path("./resources/js/curl-to-go.js")
14+
if not OUTPUT_FILE.is_file():
15+
sys.exit(
16+
f"{OUTPUT_FILE} doesn't exist. You should run this script from curl-to-go/"
17+
)
18+
19+
PATH_TO_CURL_REPO = Path("../curl")
20+
if not PATH_TO_CURL_REPO.is_dir():
21+
sys.exit(
22+
f"{PATH_TO_CURL_REPO} needs to be a git repo with cURL's source code. "
23+
"You can clone it with\n\n"
24+
"git clone https://github.com/curl/curl ../curl"
25+
# or modify the PATH_TO_CURL_REPO variable
26+
)
27+
28+
29+
PARAMS_CACHE = Path("curl_params.pickle")
30+
SHOULD_CACHE = False
31+
32+
JS_PARAMS_START = "BEGIN GENERATED CURL OPTIONS"
33+
JS_PARAMS_END = "END GENERATED CURL OPTIONS"
34+
35+
# Noteworthy commits:
36+
#
37+
# The first commit in cURL's git repo (from 1999)
38+
# ae1912cb0d494b48d514d937826c9fe83ec96c4d
39+
# has args defined in main.c, then in
40+
# 49b79b76316248d5233d08006234933913faaa3b
41+
# the arg definitions were moved to ./src/tool_getparam.c
42+
#
43+
# Originally there were only two arg "types": TRUE/FALSE which signified
44+
# whether the option expected a value or was a boolean (respectively).
45+
# Then in
46+
IMPLICIT_NO_COMMIT = "5abfdc0140df0977b02506d16796f616158bfe88"
47+
# all boolean (i.e. FALSE "type") options got an implicit --no-OPTION.
48+
# Then TRUE/FALSE was changed to ARG_STRING/ARG_BOOL.
49+
# Then it was realized that not all options should have a --no-OPTION
50+
# counterpart, so a new ARG_NONE type was added for those in
51+
# 913c3c8f5476bd7bc4d8d00509396bd4b525b8fc
52+
53+
# Other notes:
54+
#
55+
# cURL lets you not type the entire argument as long as it's unambiguous,
56+
# for example you can type --inse instead of --insecure
57+
#
58+
# cURL lets you omit the space after a short option that takes an arg
59+
# for example you can do "-dmydatahere" instead of "-o mydatahere"
60+
# it even interprets -aqomydata the same as -a -q -o mydatahere
61+
62+
OPTS_START = "struct LongShort aliases[]= {"
63+
OPTS_END = "};"
64+
65+
BOOL_ARG_TYPES = ["FALSE", "ARG_BOOL", "ARG_NONE"]
66+
STR_ARG_TYPES = ["TRUE", "ARG_STRING", "ARG_FILENAME"]
67+
ARG_TYPES = BOOL_ARG_TYPES + STR_ARG_TYPES
68+
69+
OLD_ARG_TYPES_TO_NEW = {"TRUE": "ARG_STRING", "FALSE": "ARG_BOOL"}
70+
71+
72+
def flatten(l):
73+
return list(itertools.chain.from_iterable(l))
74+
75+
76+
def commits_that_changed(filename):
77+
lines = subprocess.run(
78+
[
79+
"git",
80+
"log",
81+
"--diff-filter=d",
82+
"--date-order",
83+
"--reverse",
84+
"--format=%H %at", # full commit hash and author date time stamp
85+
"--date=iso-strict",
86+
"--",
87+
filename,
88+
],
89+
cwd=PATH_TO_CURL_REPO,
90+
capture_output=True,
91+
text=True,
92+
check=True,
93+
).stdout
94+
for line in lines.splitlines():
95+
commit_hash, timestamp = line.strip().split()
96+
yield commit_hash, int(timestamp)
97+
98+
99+
def extract_params(file_contents):
100+
lines = iter(file_contents.splitlines())
101+
params = {}
102+
for line in lines:
103+
if OPTS_START in line:
104+
break
105+
for line in lines:
106+
line = line.strip()
107+
if line.endswith(OPTS_END):
108+
break
109+
if not line.strip().startswith("{"):
110+
continue
111+
112+
# main.c has comments on the same line
113+
short, param, arg_type = line.split("/*")[0].strip().strip("{},").split(",")
114+
115+
short = short.strip().strip('"')
116+
param = param.strip().strip('"')
117+
arg_type = arg_type.strip()
118+
119+
if len(short) == 0:
120+
raise ValueError(f"short form of {param} is the empty string")
121+
if len(short) > 1: # it's a placeholder value, not a real option
122+
short = None
123+
124+
value = (short, arg_type)
125+
126+
if param in params and params[param] != value:
127+
# if param not in ["login-options"]: # I know about this one already.
128+
warnings.warn(
129+
f"{param!r} repeated with different values: {params[param]} vs. {value} "
130+
)
131+
132+
if arg_type not in ARG_TYPES:
133+
raise ValueError(f"unknown arg type: {arg_type}")
134+
135+
params[param] = value
136+
return [(l, s, t) for l, (s, t) in params.items()]
137+
138+
139+
def explicit_params_over_time():
140+
"""yields the command line arguments that appear in the source code over time"""
141+
for filename in ["./src/main.c", "./src/tool_getparam.c"]:
142+
for commit_hash, timestamp in commits_that_changed(filename):
143+
contents = subprocess.run(
144+
["git", "cat-file", "-p", f"{commit_hash}:{filename}"],
145+
cwd=PATH_TO_CURL_REPO,
146+
capture_output=True,
147+
check=True,
148+
).stdout
149+
try:
150+
contents = contents.decode("utf-8")
151+
except UnicodeDecodeError:
152+
contents = contents.decode("latin1")
153+
params = extract_params(contents)
154+
if not params:
155+
raise ValueError(
156+
f"Failed to extract params from {commit_hash}:{filename}"
157+
)
158+
yield commit_hash, timestamp, params
159+
160+
161+
def consecutive_runs(seq):
162+
for k, g in itertools.groupby(enumerate(seq), lambda i_x: i_x[0] - i_x[1]):
163+
result = list(map(itemgetter(1), g))
164+
yield result[0], result[-1]
165+
166+
167+
# TODO: de-spaghettify
168+
def params_over_time(params_all_time):
169+
hashes = [h for h, *_ in params_all_time]
170+
to_idx = {c: i for i, c in enumerate(hashes)}
171+
to_hash = {i: c for i, c in enumerate(hashes)}
172+
173+
params_all_time = [p for _, _, p in params_all_time]
174+
175+
long_args = {}
176+
short_args = {}
177+
178+
# --metalink became a boolean
179+
# 'metalink': [('string', [(14917, 15129)]), ('bool', [(15129, None)])],
180+
181+
for commit_idx, params in enumerate(params_all_time):
182+
after_implicit_no = commit_idx >= to_idx[IMPLICIT_NO_COMMIT]
183+
for long, short, arg_type in params:
184+
if arg_type == "FALSE":
185+
arg_type = "ARG_BOOL" if after_implicit_no else "ARG_NONE"
186+
187+
arg_type = {"TRUE": "ARG_STRING", "ARG_FILENAME": "ARG_STRING"}.get(
188+
arg_type, arg_type
189+
)
190+
191+
arg_type = arg_type.removeprefix("ARG_").lower()
192+
long_args.setdefault((long, arg_type), []).append(commit_idx)
193+
194+
if short is not None:
195+
short_args.setdefault((short, long), []).append(commit_idx)
196+
197+
# this option was removed more than once
198+
# ('sasl-authzid', 'ARG_STRING', None)
199+
# This one just had the short option changed and put back I think
200+
# ('http1.0', '0')
201+
new_long_args = {}
202+
new_short_args = {}
203+
for (long, arg_type), commits in long_args.items():
204+
lifetimes = [
205+
(
206+
start if start > 0 else None,
207+
(end + 1) if ((end + 1) < len(params_all_time)) else None,
208+
)
209+
for start, end in consecutive_runs(commits)
210+
]
211+
212+
arg_data = {"type": arg_type}
213+
# one arg had a trailing space
214+
name = long.removeprefix("disable-").strip()
215+
if name != long:
216+
arg_data["name"] = name
217+
ends = [l[1] for l in lifetimes]
218+
if None not in ends:
219+
arg_data["deleted"] = to_hash[max(ends)]
220+
221+
new_long_args[long] = arg_data
222+
if arg_type == "bool":
223+
new_long_args["no-" + long] = {**arg_data, "name": name, "expand": False}
224+
elif arg_type == "none":
225+
new_long_args[long]["type"] = "bool"
226+
227+
for (short, long), commits in short_args.items():
228+
lifetimes = [
229+
(
230+
start if start > 0 else None,
231+
(end + 1) if ((end + 1) < len(params_all_time)) else None,
232+
)
233+
for start, end in consecutive_runs(commits)
234+
]
235+
236+
# -N is short for --no-buffer
237+
if short == "N":
238+
long = "no-" + long
239+
240+
arg_data = {"long": long}
241+
ends = [l[1] for l in lifetimes]
242+
deleted = None not in ends
243+
if deleted:
244+
arg_data["deleted"] = to_hash[max(ends)]
245+
246+
if short in new_short_args:
247+
if new_short_args[short].get("deleted"):
248+
new_short_args[short] = arg_data
249+
else:
250+
new_short_args[short] = arg_data
251+
252+
def as_js(d, var_name):
253+
yield f"\tvar {var_name} = {{"
254+
for top_key, opt_dict in d.items():
255+
# TODO: rough
256+
def quote(key):
257+
return key if key.isalpha() else repr(key)
258+
259+
def val_to_js(val):
260+
if isinstance(val, str):
261+
return repr(val)
262+
if isinstance(val, bool):
263+
return str(val).lower()
264+
raise TypeError(f"can't convert values of type {type(val)} to JS")
265+
266+
vals = [f"{quote(k)}: {val_to_js(v)}" for k, v in opt_dict.items()]
267+
268+
yield f"\t\t{top_key!r}: {{{', '.join(vals)}}},"
269+
yield "\t}"
270+
271+
yield from as_js(new_long_args, "longOptions")
272+
yield from as_js(new_short_args, "shortOptions")
273+
274+
275+
def on_git_master():
276+
output = subprocess.run(
277+
["git", "status", "-uno"], cwd=PATH_TO_CURL_REPO, capture_output=True, text=True
278+
).stdout.strip()
279+
return output.startswith("On branch master")
280+
281+
282+
if __name__ == "__main__":
283+
if not on_git_master():
284+
sys.exit("not on curl's git master")
285+
286+
# cache because this takes a few seconds
287+
if SHOULD_CACHE:
288+
if not PARAMS_CACHE.is_file():
289+
params_all_time = list(explicit_params_over_time())
290+
with open(PARAMS_CACHE, "wb") as f:
291+
params_all_time = pickle.dump(params_all_time, f)
292+
else:
293+
with open(PARAMS_CACHE, "rb") as f:
294+
params_all_time = pickle.load(f)
295+
else:
296+
params_all_time = list(explicit_params_over_time())
297+
298+
js_params_lines = params_over_time(params_all_time)
299+
300+
new_lines = []
301+
with open(OUTPUT_FILE) as f:
302+
for line in f:
303+
new_lines.append(line)
304+
if JS_PARAMS_START in line:
305+
break
306+
new_lines += [l+'\n' for l in js_params_lines]
307+
for line in f:
308+
if JS_PARAMS_END in line:
309+
new_lines.append(line)
310+
break
311+
for line in f:
312+
new_lines.append(line)
313+
with open(OUTPUT_FILE, 'w', newline='\n') as f:
314+
f.write(''.join(new_lines))

resources/js/common.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ $(function()
9292

9393
// Fill in examples
9494
$('#example1').click(function() {
95-
$('#input').val('curl canhazip.com').keyup();
95+
$('#input').val('curl icanhazip.com').keyup();
9696
});
9797
$('#example2').click(function() {
9898
$('#input').val('curl https://api.example.com/surprise \\\n -u banana:coconuts \\\n -d "sample data"').keyup();

0 commit comments

Comments
 (0)