|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +from pathlib import Path |
| 4 | +import warnings |
| 5 | +import pickle |
| 6 | +import sys |
| 7 | +import subprocess |
| 8 | +import itertools |
| 9 | +from operator import itemgetter |
| 10 | + |
| 11 | +# TODO: make this and the location of the repo relative to script location |
| 12 | +# TODO: and command line args |
| 13 | +OUTPUT_FILE = Path("./resources/js/curl-to-go.js") |
| 14 | +if not OUTPUT_FILE.is_file(): |
| 15 | + sys.exit( |
| 16 | + f"{OUTPUT_FILE} doesn't exist. You should run this script from curl-to-go/" |
| 17 | + ) |
| 18 | + |
| 19 | +PATH_TO_CURL_REPO = Path("../curl") |
| 20 | +if not PATH_TO_CURL_REPO.is_dir(): |
| 21 | + sys.exit( |
| 22 | + f"{PATH_TO_CURL_REPO} needs to be a git repo with cURL's source code. " |
| 23 | + "You can clone it with\n\n" |
| 24 | + "git clone https://github.com/curl/curl ../curl" |
| 25 | + # or modify the PATH_TO_CURL_REPO variable |
| 26 | + ) |
| 27 | + |
| 28 | + |
| 29 | +PARAMS_CACHE = Path("curl_params.pickle") |
| 30 | +SHOULD_CACHE = False |
| 31 | + |
| 32 | +JS_PARAMS_START = "BEGIN GENERATED CURL OPTIONS" |
| 33 | +JS_PARAMS_END = "END GENERATED CURL OPTIONS" |
| 34 | + |
| 35 | +# Noteworthy commits: |
| 36 | +# |
| 37 | +# The first commit in cURL's git repo (from 1999) |
| 38 | +# ae1912cb0d494b48d514d937826c9fe83ec96c4d |
| 39 | +# has args defined in main.c, then in |
| 40 | +# 49b79b76316248d5233d08006234933913faaa3b |
| 41 | +# the arg definitions were moved to ./src/tool_getparam.c |
| 42 | +# |
| 43 | +# Originally there were only two arg "types": TRUE/FALSE which signified |
| 44 | +# whether the option expected a value or was a boolean (respectively). |
| 45 | +# Then in |
| 46 | +IMPLICIT_NO_COMMIT = "5abfdc0140df0977b02506d16796f616158bfe88" |
| 47 | +# all boolean (i.e. FALSE "type") options got an implicit --no-OPTION. |
| 48 | +# Then TRUE/FALSE was changed to ARG_STRING/ARG_BOOL. |
| 49 | +# Then it was realized that not all options should have a --no-OPTION |
| 50 | +# counterpart, so a new ARG_NONE type was added for those in |
| 51 | +# 913c3c8f5476bd7bc4d8d00509396bd4b525b8fc |
| 52 | + |
| 53 | +# Other notes: |
| 54 | +# |
| 55 | +# cURL lets you not type the entire argument as long as it's unambiguous, |
| 56 | +# for example you can type --inse instead of --insecure |
| 57 | +# |
| 58 | +# cURL lets you omit the space after a short option that takes an arg |
| 59 | +# for example you can do "-dmydatahere" instead of "-o mydatahere" |
| 60 | +# it even interprets -aqomydata the same as -a -q -o mydatahere |
| 61 | + |
| 62 | +OPTS_START = "struct LongShort aliases[]= {" |
| 63 | +OPTS_END = "};" |
| 64 | + |
| 65 | +BOOL_ARG_TYPES = ["FALSE", "ARG_BOOL", "ARG_NONE"] |
| 66 | +STR_ARG_TYPES = ["TRUE", "ARG_STRING", "ARG_FILENAME"] |
| 67 | +ARG_TYPES = BOOL_ARG_TYPES + STR_ARG_TYPES |
| 68 | + |
| 69 | +OLD_ARG_TYPES_TO_NEW = {"TRUE": "ARG_STRING", "FALSE": "ARG_BOOL"} |
| 70 | + |
| 71 | + |
| 72 | +def flatten(l): |
| 73 | + return list(itertools.chain.from_iterable(l)) |
| 74 | + |
| 75 | + |
| 76 | +def commits_that_changed(filename): |
| 77 | + lines = subprocess.run( |
| 78 | + [ |
| 79 | + "git", |
| 80 | + "log", |
| 81 | + "--diff-filter=d", |
| 82 | + "--date-order", |
| 83 | + "--reverse", |
| 84 | + "--format=%H %at", # full commit hash and author date time stamp |
| 85 | + "--date=iso-strict", |
| 86 | + "--", |
| 87 | + filename, |
| 88 | + ], |
| 89 | + cwd=PATH_TO_CURL_REPO, |
| 90 | + capture_output=True, |
| 91 | + text=True, |
| 92 | + check=True, |
| 93 | + ).stdout |
| 94 | + for line in lines.splitlines(): |
| 95 | + commit_hash, timestamp = line.strip().split() |
| 96 | + yield commit_hash, int(timestamp) |
| 97 | + |
| 98 | + |
| 99 | +def extract_params(file_contents): |
| 100 | + lines = iter(file_contents.splitlines()) |
| 101 | + params = {} |
| 102 | + for line in lines: |
| 103 | + if OPTS_START in line: |
| 104 | + break |
| 105 | + for line in lines: |
| 106 | + line = line.strip() |
| 107 | + if line.endswith(OPTS_END): |
| 108 | + break |
| 109 | + if not line.strip().startswith("{"): |
| 110 | + continue |
| 111 | + |
| 112 | + # main.c has comments on the same line |
| 113 | + short, param, arg_type = line.split("/*")[0].strip().strip("{},").split(",") |
| 114 | + |
| 115 | + short = short.strip().strip('"') |
| 116 | + param = param.strip().strip('"') |
| 117 | + arg_type = arg_type.strip() |
| 118 | + |
| 119 | + if len(short) == 0: |
| 120 | + raise ValueError(f"short form of {param} is the empty string") |
| 121 | + if len(short) > 1: # it's a placeholder value, not a real option |
| 122 | + short = None |
| 123 | + |
| 124 | + value = (short, arg_type) |
| 125 | + |
| 126 | + if param in params and params[param] != value: |
| 127 | + # if param not in ["login-options"]: # I know about this one already. |
| 128 | + warnings.warn( |
| 129 | + f"{param!r} repeated with different values: {params[param]} vs. {value} " |
| 130 | + ) |
| 131 | + |
| 132 | + if arg_type not in ARG_TYPES: |
| 133 | + raise ValueError(f"unknown arg type: {arg_type}") |
| 134 | + |
| 135 | + params[param] = value |
| 136 | + return [(l, s, t) for l, (s, t) in params.items()] |
| 137 | + |
| 138 | + |
| 139 | +def explicit_params_over_time(): |
| 140 | + """yields the command line arguments that appear in the source code over time""" |
| 141 | + for filename in ["./src/main.c", "./src/tool_getparam.c"]: |
| 142 | + for commit_hash, timestamp in commits_that_changed(filename): |
| 143 | + contents = subprocess.run( |
| 144 | + ["git", "cat-file", "-p", f"{commit_hash}:{filename}"], |
| 145 | + cwd=PATH_TO_CURL_REPO, |
| 146 | + capture_output=True, |
| 147 | + check=True, |
| 148 | + ).stdout |
| 149 | + try: |
| 150 | + contents = contents.decode("utf-8") |
| 151 | + except UnicodeDecodeError: |
| 152 | + contents = contents.decode("latin1") |
| 153 | + params = extract_params(contents) |
| 154 | + if not params: |
| 155 | + raise ValueError( |
| 156 | + f"Failed to extract params from {commit_hash}:{filename}" |
| 157 | + ) |
| 158 | + yield commit_hash, timestamp, params |
| 159 | + |
| 160 | + |
| 161 | +def consecutive_runs(seq): |
| 162 | + for k, g in itertools.groupby(enumerate(seq), lambda i_x: i_x[0] - i_x[1]): |
| 163 | + result = list(map(itemgetter(1), g)) |
| 164 | + yield result[0], result[-1] |
| 165 | + |
| 166 | + |
| 167 | +# TODO: de-spaghettify |
| 168 | +def params_over_time(params_all_time): |
| 169 | + hashes = [h for h, *_ in params_all_time] |
| 170 | + to_idx = {c: i for i, c in enumerate(hashes)} |
| 171 | + to_hash = {i: c for i, c in enumerate(hashes)} |
| 172 | + |
| 173 | + params_all_time = [p for _, _, p in params_all_time] |
| 174 | + |
| 175 | + long_args = {} |
| 176 | + short_args = {} |
| 177 | + |
| 178 | + # --metalink became a boolean |
| 179 | + # 'metalink': [('string', [(14917, 15129)]), ('bool', [(15129, None)])], |
| 180 | + |
| 181 | + for commit_idx, params in enumerate(params_all_time): |
| 182 | + after_implicit_no = commit_idx >= to_idx[IMPLICIT_NO_COMMIT] |
| 183 | + for long, short, arg_type in params: |
| 184 | + if arg_type == "FALSE": |
| 185 | + arg_type = "ARG_BOOL" if after_implicit_no else "ARG_NONE" |
| 186 | + |
| 187 | + arg_type = {"TRUE": "ARG_STRING", "ARG_FILENAME": "ARG_STRING"}.get( |
| 188 | + arg_type, arg_type |
| 189 | + ) |
| 190 | + |
| 191 | + arg_type = arg_type.removeprefix("ARG_").lower() |
| 192 | + long_args.setdefault((long, arg_type), []).append(commit_idx) |
| 193 | + |
| 194 | + if short is not None: |
| 195 | + short_args.setdefault((short, long), []).append(commit_idx) |
| 196 | + |
| 197 | + # this option was removed more than once |
| 198 | + # ('sasl-authzid', 'ARG_STRING', None) |
| 199 | + # This one just had the short option changed and put back I think |
| 200 | + # ('http1.0', '0') |
| 201 | + new_long_args = {} |
| 202 | + new_short_args = {} |
| 203 | + for (long, arg_type), commits in long_args.items(): |
| 204 | + lifetimes = [ |
| 205 | + ( |
| 206 | + start if start > 0 else None, |
| 207 | + (end + 1) if ((end + 1) < len(params_all_time)) else None, |
| 208 | + ) |
| 209 | + for start, end in consecutive_runs(commits) |
| 210 | + ] |
| 211 | + |
| 212 | + arg_data = {"type": arg_type} |
| 213 | + # one arg had a trailing space |
| 214 | + name = long.removeprefix("disable-").strip() |
| 215 | + if name != long: |
| 216 | + arg_data["name"] = name |
| 217 | + ends = [l[1] for l in lifetimes] |
| 218 | + if None not in ends: |
| 219 | + arg_data["deleted"] = to_hash[max(ends)] |
| 220 | + |
| 221 | + new_long_args[long] = arg_data |
| 222 | + if arg_type == "bool": |
| 223 | + new_long_args["no-" + long] = {**arg_data, "name": name, "expand": False} |
| 224 | + elif arg_type == "none": |
| 225 | + new_long_args[long]["type"] = "bool" |
| 226 | + |
| 227 | + for (short, long), commits in short_args.items(): |
| 228 | + lifetimes = [ |
| 229 | + ( |
| 230 | + start if start > 0 else None, |
| 231 | + (end + 1) if ((end + 1) < len(params_all_time)) else None, |
| 232 | + ) |
| 233 | + for start, end in consecutive_runs(commits) |
| 234 | + ] |
| 235 | + |
| 236 | + # -N is short for --no-buffer |
| 237 | + if short == "N": |
| 238 | + long = "no-" + long |
| 239 | + |
| 240 | + arg_data = {"long": long} |
| 241 | + ends = [l[1] for l in lifetimes] |
| 242 | + deleted = None not in ends |
| 243 | + if deleted: |
| 244 | + arg_data["deleted"] = to_hash[max(ends)] |
| 245 | + |
| 246 | + if short in new_short_args: |
| 247 | + if new_short_args[short].get("deleted"): |
| 248 | + new_short_args[short] = arg_data |
| 249 | + else: |
| 250 | + new_short_args[short] = arg_data |
| 251 | + |
| 252 | + def as_js(d, var_name): |
| 253 | + yield f"\tvar {var_name} = {{" |
| 254 | + for top_key, opt_dict in d.items(): |
| 255 | + # TODO: rough |
| 256 | + def quote(key): |
| 257 | + return key if key.isalpha() else repr(key) |
| 258 | + |
| 259 | + def val_to_js(val): |
| 260 | + if isinstance(val, str): |
| 261 | + return repr(val) |
| 262 | + if isinstance(val, bool): |
| 263 | + return str(val).lower() |
| 264 | + raise TypeError(f"can't convert values of type {type(val)} to JS") |
| 265 | + |
| 266 | + vals = [f"{quote(k)}: {val_to_js(v)}" for k, v in opt_dict.items()] |
| 267 | + |
| 268 | + yield f"\t\t{top_key!r}: {{{', '.join(vals)}}}," |
| 269 | + yield "\t}" |
| 270 | + |
| 271 | + yield from as_js(new_long_args, "longOptions") |
| 272 | + yield from as_js(new_short_args, "shortOptions") |
| 273 | + |
| 274 | + |
| 275 | +def on_git_master(): |
| 276 | + output = subprocess.run( |
| 277 | + ["git", "status", "-uno"], cwd=PATH_TO_CURL_REPO, capture_output=True, text=True |
| 278 | + ).stdout.strip() |
| 279 | + return output.startswith("On branch master") |
| 280 | + |
| 281 | + |
| 282 | +if __name__ == "__main__": |
| 283 | + if not on_git_master(): |
| 284 | + sys.exit("not on curl's git master") |
| 285 | + |
| 286 | + # cache because this takes a few seconds |
| 287 | + if SHOULD_CACHE: |
| 288 | + if not PARAMS_CACHE.is_file(): |
| 289 | + params_all_time = list(explicit_params_over_time()) |
| 290 | + with open(PARAMS_CACHE, "wb") as f: |
| 291 | + params_all_time = pickle.dump(params_all_time, f) |
| 292 | + else: |
| 293 | + with open(PARAMS_CACHE, "rb") as f: |
| 294 | + params_all_time = pickle.load(f) |
| 295 | + else: |
| 296 | + params_all_time = list(explicit_params_over_time()) |
| 297 | + |
| 298 | + js_params_lines = params_over_time(params_all_time) |
| 299 | + |
| 300 | + new_lines = [] |
| 301 | + with open(OUTPUT_FILE) as f: |
| 302 | + for line in f: |
| 303 | + new_lines.append(line) |
| 304 | + if JS_PARAMS_START in line: |
| 305 | + break |
| 306 | + new_lines += [l+'\n' for l in js_params_lines] |
| 307 | + for line in f: |
| 308 | + if JS_PARAMS_END in line: |
| 309 | + new_lines.append(line) |
| 310 | + break |
| 311 | + for line in f: |
| 312 | + new_lines.append(line) |
| 313 | + with open(OUTPUT_FILE, 'w', newline='\n') as f: |
| 314 | + f.write(''.join(new_lines)) |
0 commit comments