|
| 1 | +# client.py |
| 2 | +# |
| 3 | +# Copyright 2019 Jason McBrayer |
| 4 | +# |
| 5 | +# This program is free software: you can redistribute it and/or modify |
| 6 | +# it under the terms of the GNU Affero General Public License as published by |
| 7 | +# the Free Software Foundation, either version 3 of the License, or |
| 8 | +# (at your option) any later version. |
| 9 | +# |
| 10 | +# This program is distributed in the hope that it will be useful, |
| 11 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | +# GNU Affero General Public License for more details. |
| 14 | +# |
| 15 | +# You should have received a copy of the GNU Affero General Public License |
| 16 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 17 | + |
| 18 | +import codecs |
| 19 | +import collections |
| 20 | +import fnmatch |
| 21 | +import io |
| 22 | +import mimetypes |
| 23 | +import os.path |
| 24 | +import random |
| 25 | +import shlex |
| 26 | +import shutil |
| 27 | +import socket |
| 28 | +import ssl |
| 29 | +import subprocess |
| 30 | +import sys |
| 31 | +import tempfile |
| 32 | +import time |
| 33 | +import urllib.parse |
| 34 | +from email.message import EmailMessage |
| 35 | + |
| 36 | + |
| 37 | +class Response: |
| 38 | + content = None |
| 39 | + content_type = None |
| 40 | + charset = None |
| 41 | + lang = None |
| 42 | + url = None |
| 43 | + status = None |
| 44 | + status_meta = None |
| 45 | + prompt = None |
| 46 | + num_bytes = None |
| 47 | + error_message = None |
| 48 | + |
| 49 | + def __init__( |
| 50 | + self, |
| 51 | + content=None, |
| 52 | + content_type=None, |
| 53 | + charset=None, |
| 54 | + lang=None, |
| 55 | + url=None, |
| 56 | + status=None, |
| 57 | + status_meta=None, |
| 58 | + prompt=None, |
| 59 | + num_bytes=None, |
| 60 | + error_message=None, |
| 61 | + ): |
| 62 | + self.content = content |
| 63 | + self.content_type = content_type |
| 64 | + self.charset = charset |
| 65 | + self.lang = lang |
| 66 | + self.url = url |
| 67 | + self.status = status |
| 68 | + self.status_meta = status_meta |
| 69 | + self.prompt = prompt |
| 70 | + self.num_bytes = num_bytes |
| 71 | + self.error_message = error_message |
| 72 | + |
| 73 | + |
| 74 | +def fetch(raw_url): |
| 75 | + # Do everything which touches the network in one block, |
| 76 | + # so we only need to catch exceptions once |
| 77 | + url = urllib.parse.urlparse(raw_url, "gemini") |
| 78 | + header = "" |
| 79 | + try: |
| 80 | + # Is this a local file? |
| 81 | + if not url.netloc: |
| 82 | + print("ERROR: {} parses with no netloc".format(raw_url)) |
| 83 | + f.close() |
| 84 | + return |
| 85 | + else: |
| 86 | + address, f = _send_request(url) |
| 87 | + # Read response header |
| 88 | + header = f.readline(1027) |
| 89 | + header = header.decode("UTF-8") |
| 90 | + if not header or header[-1] != "\n": |
| 91 | + _debug("ERROR: Received invalid header from server!") |
| 92 | + return |
| 93 | + header = header.strip() |
| 94 | + _debug("Response header: %s." % header) |
| 95 | + |
| 96 | + # Catch network errors which may happen on initial connection |
| 97 | + except Exception as err: |
| 98 | + # Print an error message |
| 99 | + if isinstance(err, socket.gaierror): |
| 100 | + print("ERROR: DNS error!") |
| 101 | + return |
| 102 | + elif isinstance(err, ConnectionRefusedError): |
| 103 | + print("ERROR: Connection refused!") |
| 104 | + return |
| 105 | + elif isinstance(err, ConnectionResetError): |
| 106 | + print("ERROR: Connection reset!") |
| 107 | + return |
| 108 | + elif isinstance(err, (TimeoutError, socket.timeout)): |
| 109 | + print( |
| 110 | + """ERROR: Connection timed out! |
| 111 | + Slow internet connection? Use 'set timeout' to be more patient.""" |
| 112 | + ) |
| 113 | + return |
| 114 | + else: |
| 115 | + print("ERROR: " + str(err)) |
| 116 | + return |
| 117 | + # Validate header |
| 118 | + header_split = header.split(maxsplit=1) |
| 119 | + if len(header_split) < 1: |
| 120 | + print("ERROR: Received invalid header from server!") |
| 121 | + f.close() |
| 122 | + return |
| 123 | + status = header_split[0] |
| 124 | + if len(header_split) > 1: |
| 125 | + meta = header_split[1] |
| 126 | + if len(header) > 1024 or len(status) != 2 or not status.isnumeric(): |
| 127 | + print("ERROR: Received invalid header from server!") |
| 128 | + f.close() |
| 129 | + return |
| 130 | + |
| 131 | + # Handle headers. Not all headers are handled yet. |
| 132 | + # Input |
| 133 | + if status.startswith("1"): |
| 134 | + if len(header_split) < 2: |
| 135 | + print("ERROR: Input status requires a meta value in header!") |
| 136 | + return |
| 137 | + return Response( |
| 138 | + url=url.geturl(), |
| 139 | + status=status, |
| 140 | + prompt=meta, |
| 141 | + ) |
| 142 | + # Redirects |
| 143 | + elif status.startswith("3"): |
| 144 | + if len(header_split) < 2: |
| 145 | + print("ERROR: Redirect status requires a meta value in header!") |
| 146 | + return |
| 147 | + return Response( |
| 148 | + url=urllib.parse.urlparse(meta).geturl(), |
| 149 | + status=status, |
| 150 | + ) |
| 151 | + # Errors |
| 152 | + elif status.startswith("4") or status.startswith("5"): |
| 153 | + if len(header_split) < 2: |
| 154 | + print("ERROR: Error status requires a meta value in header!") |
| 155 | + return |
| 156 | + return Response( |
| 157 | + status=status, |
| 158 | + error_message=meta, |
| 159 | + ) |
| 160 | + return |
| 161 | + # Client cert |
| 162 | + elif status.startswith("6"): |
| 163 | + print("ERROR: The requested resource requires client-certificate") |
| 164 | + return |
| 165 | + # Invalid status |
| 166 | + elif not status.startswith("2"): |
| 167 | + print("ERROR: Server returned undefined status code %s!" % status) |
| 168 | + return |
| 169 | + |
| 170 | + # Handle success |
| 171 | + assert status.startswith("2") |
| 172 | + if len(header_split) < 2: |
| 173 | + print("ERROR: Success status requires a meta value in header!") |
| 174 | + return |
| 175 | + mime = meta |
| 176 | + if mime == "": |
| 177 | + mime = "text/gemini; charset=utf-8" |
| 178 | + msg = EmailMessage() |
| 179 | + msg["content-type"] = mime |
| 180 | + mime, mime_options = msg.get_content_type(), msg["Content-Type"].params |
| 181 | + default_charset = "utf-8" |
| 182 | + charset = None |
| 183 | + if "charset" in mime_options: |
| 184 | + try: |
| 185 | + codecs.lookup(mime_options["charset"]) |
| 186 | + charset = mime_options["charset"] |
| 187 | + except LookupError: |
| 188 | + print("Header declared unknown encoding %s" % mime_options["charset"]) |
| 189 | + return |
| 190 | + lang = mime_options["lang"] if "lang" in mime_options else None |
| 191 | + # Read the response body over the network |
| 192 | + try: |
| 193 | + body = f.read() |
| 194 | + except Exception: |
| 195 | + print("Error reading response over network!") |
| 196 | + return |
| 197 | + if mime.startswith("text/"): |
| 198 | + try: |
| 199 | + content = codecs.decode(body, charset or default_charset) |
| 200 | + except: |
| 201 | + # print("ERROR: problem decoding content with %s charset" % charset) |
| 202 | + return |
| 203 | + else: |
| 204 | + content = body |
| 205 | + return Response( |
| 206 | + content=content, |
| 207 | + content_type=mime, |
| 208 | + charset=charset, |
| 209 | + lang=lang, |
| 210 | + num_bytes=len(body), |
| 211 | + url=url.geturl(), |
| 212 | + status=status, |
| 213 | + ) |
| 214 | + |
| 215 | + |
| 216 | +def _send_request(url): |
| 217 | + """Send a selector to a given host and port. |
| 218 | + Returns the resolved address and binary file with the reply.""" |
| 219 | + port = url.port if url.port is not None else 1965 |
| 220 | + addresses = _get_addresses(url.hostname, port) |
| 221 | + # Connect to remote host by any address possible |
| 222 | + err = None |
| 223 | + for address in addresses: |
| 224 | + _debug("Connecting to: " + str(address[4])) |
| 225 | + s = socket.socket(address[0], address[1]) |
| 226 | + s.settimeout(15.0) |
| 227 | + context = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_CLIENT) |
| 228 | + context.check_hostname = False |
| 229 | + context.verify_mode = ssl.CERT_NONE |
| 230 | + # Impose minimum TLS version |
| 231 | + if sys.version_info.minor == 7: |
| 232 | + context.minimum_version = ssl.TLSVersion.TLSv1_2 |
| 233 | + else: |
| 234 | + context.options | ssl.OP_NO_TLSv1_1 |
| 235 | + context.options | ssl.OP_NO_SSLv3 |
| 236 | + context.options | ssl.OP_NO_SSLv2 |
| 237 | + context.set_ciphers( |
| 238 | + "AES256-GCM-SHA384:AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!3DES:!MD5:!PSK" |
| 239 | + ) |
| 240 | + # print(context.get_ciphers()) |
| 241 | + s = context.wrap_socket(s, server_hostname=url.hostname) |
| 242 | + try: |
| 243 | + s.connect(address[4]) |
| 244 | + break |
| 245 | + except OSError as e: |
| 246 | + err = e |
| 247 | + else: |
| 248 | + # If we couldn't connect to *any* of the addresses, just |
| 249 | + # bubble up the exception from the last attempt and deny |
| 250 | + # knowledge of earlier failures. |
| 251 | + raise err |
| 252 | + |
| 253 | + _debug("Established {} connection.".format(s.version())) |
| 254 | + _debug("Cipher is: {}.".format(s.cipher())) |
| 255 | + |
| 256 | + # Send request and wrap response in a file descriptor |
| 257 | + _debug("Sending %s<CRLF>" % url.geturl()) |
| 258 | + s.sendall((url.geturl() + "\r\n").encode("UTF-8")) |
| 259 | + return address, s.makefile(mode="rb") |
| 260 | + |
| 261 | + |
| 262 | +def _get_addresses(host, port): |
| 263 | + # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled |
| 264 | + if ":" in host: |
| 265 | + # This is likely a literal IPv6 address, so we can *only* ask for |
| 266 | + # IPv6 addresses or getaddrinfo will complain |
| 267 | + family_mask = socket.AF_INET6 |
| 268 | + elif socket.has_ipv6: |
| 269 | + # Accept either IPv4 or IPv6 addresses |
| 270 | + family_mask = 0 |
| 271 | + else: |
| 272 | + # IPv4 only |
| 273 | + family_mask = socket.AF_INET |
| 274 | + addresses = socket.getaddrinfo( |
| 275 | + host, port, family=family_mask, type=socket.SOCK_STREAM |
| 276 | + ) |
| 277 | + # Sort addresses so IPv6 ones come first |
| 278 | + addresses.sort(key=lambda add: add[0] == socket.AF_INET6, reverse=True) |
| 279 | + return addresses |
| 280 | + |
| 281 | + |
| 282 | +def _parse_url(url): |
| 283 | + """Work around issues with Python's urrlib.parse""" |
| 284 | + pass |
| 285 | + |
| 286 | + |
| 287 | +def _debug(message): |
| 288 | + pass |
0 commit comments