|
| 1 | +#! /usr/bin/env python |
| 2 | + |
| 3 | +""" |
| 4 | +This script is a rudimentary end-to-end test of the content scanner. It starts the |
| 5 | +content scanner as a subprocess, using the hard-coded config.yaml. The scanner is |
| 6 | +configured with matrix.org as its upstream homserver, and to use a dummy scanning script |
| 7 | +which just calls `sleep 1`. |
| 8 | +
|
| 9 | +Next, we concurrently request Matrix Avatar URLs taken from the public |
| 10 | +#synapse-dev:matrix.org room. (The URLs are hard-coded in this file. It's ugly, but good |
| 11 | +enough for now.) |
| 12 | +
|
| 13 | +We wait for the content scanner to finish responding to reach response, reading the |
| 14 | +response bodies from the scanner. We print how long (wall clock) it took to do so, |
| 15 | +and close the content scanner subprocess. |
| 16 | +
|
| 17 | +Invoke this script with `-v` to print out content scanner logs. |
| 18 | +""" |
| 19 | + |
| 20 | +import asyncio |
| 21 | +import collections |
| 22 | +import os.path |
| 23 | +import subprocess |
| 24 | +import sys |
| 25 | +import time |
| 26 | +import timeit |
| 27 | +import traceback |
| 28 | + |
| 29 | +import aiohttp |
| 30 | + |
| 31 | +timer = timeit.default_timer |
| 32 | + |
| 33 | +AVATAR_URLS_TAKEN_FROM_SYNAPSE_DEV = [ |
| 34 | + "http://127.0.0.1:8080/ipfs/QmfS3zCyhM4KgvYWH1HrD1Rnumns7fyTzcSHjk5fsWe5ZH?filename=IMG_20230222_191003_e_1677506180005.jpg", |
| 35 | + "mxc://1312.media/SQdCZTnJfLkBAxgQMPkVgsPY", |
| 36 | + "mxc://abolivier.bzh/zPatuAFfwaXVxsJudPWkFcWF", |
| 37 | + "mxc://aguiarvieira.pt/74665ee95b29e2a217b88911cfc664a1ccbb7e141703097801866477568", |
| 38 | + "mxc://amorgan.xyz/JHlaCvKzIPrlcnYWTFoOqsmH", |
| 39 | + "mxc://asra.gr/4f06832b1418d4c5ba91cae68135592754841080", |
| 40 | + "mxc://automattic.com/cf00594221369ad4498eb3b73032969c7be0fa3b", |
| 41 | + "mxc://b3.hk/kKAHEhEOFMyXHQCcSFuQOQza", |
| 42 | + "mxc://beeper.com/18850ea089e0ecc16d7db55527925b43ad63295c", |
| 43 | + "mxc://beeper.com/c2ef30e46e6f99cd913f2b632573033c60a74524", |
| 44 | + "mxc://bolha.chat/BevcFWoBVCMGMGqYQNhVddfu", |
| 45 | + "mxc://bolha.chat/ClRsLphUvHmWHWOFjKLwiknN", |
| 46 | + "mxc://bonifacelabs.ca/WjbmLXYLDRPxUzorCdExENVZ", |
| 47 | + "mxc://bramen.com.co/oTFgSIkJdDTBIcuvtWTukatz", |
| 48 | + "mxc://brodi.me/PPjyGXrcCqcwRrKpYoIgLvgw", |
| 49 | + "mxc://cadair.com/LdiPRXiYOVpdWvURyocZmvUo", |
| 50 | + "mxc://chat.decatrion.com/MXOQjcRSnVSqOALFTDlgIKnq", |
| 51 | + "mxc://chat.interru.io/UJdEhRreNufARVwpCAGWnHTx", |
| 52 | + "mxc://chat.mistli.net/MIlfZzUpEUelhCLXVFPMacZO", |
| 53 | + "mxc://chat.pyro.monster/bgZxviIdWbBYWInhwZozaryA", |
| 54 | + "mxc://chat.upi.li/rYupYBDqEXxkiQGEhPOiNUGs", |
| 55 | + "mxc://cody.to/hXfwsZbCLswNYgvRDqIQZOnS", |
| 56 | + "mxc://connecteu.rs/8c81538fc306d556bbbce15230b12c68ee7395f8", |
| 57 | + "mxc://cyberia.club/ObtWErjecvRjoCxbEWzHSiXM", |
| 58 | + "mxc://element.io/050bd1fa6777a004eb8ffd6c31028998331a91aa", |
| 59 | + "mxc://element.io/0750b4015ab58d23d704d3a828a1173a175cf95f", |
| 60 | + "mxc://element.io/1fec45ef987253db2728112927562567f8dd9d5e", |
| 61 | + "mxc://element.io/42eff27432ec038e933337dabcdfe3d230b3c68d", |
| 62 | + "mxc://element.io/47465a9ec77dd489e49b6748bc53c4f0122f06d7", |
| 63 | + "mxc://element.io/6130836e26b462a6fe63d4e080dd9d2037490f2b", |
| 64 | + "mxc://element.io/658198ce7f58872cc8fb68862f1eabdc5d847fbc", |
| 65 | + "mxc://element.io/a3f0d8b0868a7bf4e7449141167747a4699109ff", |
| 66 | + "mxc://element.io/bd48d4466c7e21b2ce00836631c06360206c29a0", |
| 67 | + "mxc://element.io/f03df00167d5f7ad5b5eac5375f32146cc2c3f51", |
| 68 | + "mxc://envs.net/89be88bd94378aef18b7f01e6a14d2228cfbb9fa", |
| 69 | + "mxc://envs.net/de405527b5c8dca188d6d8c7f3731e861a9b17ec", |
| 70 | + "mxc://ergaster.org/nmVViTqFqKGGxSHHcwevqnig", |
| 71 | + "mxc://ether.ai/JKGvwPJrfnWiWEIeVGLtJaSl", |
| 72 | + "mxc://fabcity.hamburg/QdttdrpZgTNKcJJWauixXEvQ", |
| 73 | + "mxc://fachschaften.org/c8faf7765794be1b24b3117925ac2464a204fc961726279478688088064", |
| 74 | + "mxc://gatto.club/qEJyuPBKpZITccTfIriEebdK", |
| 75 | + "mxc://gruenhage.xyz/3ecdecdab75225c0a14c7c804061d86962ee1550", |
| 76 | + "mxc://hackerlab.in/vjENMlrncPUGDmbyMZhWJzkG", |
| 77 | + "mxc://hackliberty.org/LeTsthiOdqoNnjOjqWjxWMAI", |
| 78 | + "mxc://half-shot.uk/81696e31e533651fb9e44ce351b4201151042acd", |
| 79 | + "mxc://jacksonchen666.com/pQoQssnTIGKOYHpcWUmYpdsQ", |
| 80 | + "mxc://jameskitt616.one/pBZDFcMKCjVjkrTMgMykKpTi", |
| 81 | + "mxc://jboi.nl/dvVWQixQMJyIQoaLFqFTTpsE", |
| 82 | + "mxc://jki.re/NBtxUkzjXpmdsGychrevxsaB", |
| 83 | + "mxc://lant.uk/MVZeSTcVlpNiDToBuKgyQfIK", |
| 84 | + "mxc://librepush.net/WbEnGmxZGKJyHqbojduVeatQ", |
| 85 | + "mxc://littlevortex.net/jSNRNEyKLRnzYEpsODAUznIZ", |
| 86 | + "mxc://luebke.io/imaijIHMncPjQqYRLtByZRzX", |
| 87 | + "mxc://matrix.0x45.moe/PwcDRntlwelLMuofemYarmqx", |
| 88 | + "mxc://matrix.atommac.com/cAycTPLQEkgtZSlZlRlZXoTx", |
| 89 | + "mxc://matrix.clandestine.network/JpKsGDMkNnSkfQqUdFuoBkFy", |
| 90 | + "mxc://matrix.eclabs.de/KyXZzZTeJyhQDBkqGBcKWyBp", |
| 91 | + "mxc://matrix.f5.htw-berlin.de/LosKszHTJgwslbvrTvNWanwE", |
| 92 | + "mxc://matrix.kevwe.se/PXHQcmOahOjAJoTouFBmevfj", |
| 93 | + "mxc://matrix.m0dex.eu/c2qHa8jqd86MdKplo1VQamYOhkMxkGEl", |
| 94 | + "mxc://matrix.org/AokEDpMKDROUmGwuoErhRIxv", |
| 95 | + "mxc://matrix.org/BORiLtSOEUnZiwCcaJftvxxm", |
| 96 | + "mxc://matrix.org/BugjUgdADNUndQASgkYDHogL", |
| 97 | + "mxc://matrix.org/CLtgiPGknzEpKDiyOrUedmEc", |
| 98 | + "mxc://matrix.org/DIGiJjzKkVsWwpppAcrGRwzB", |
| 99 | + "mxc://matrix.org/DrLDzhkVYvGjfCiUBLkrYLhs", |
| 100 | + "mxc://matrix.org/EbNOzLZJdNszNDDfDrPFvTTx", |
| 101 | + "mxc://matrix.org/FEzUmMhxMsqtfXKyYQFDROgO", |
| 102 | + "mxc://matrix.org/FVaBPAAuzqpBstuOfxDhDuiw", |
| 103 | + "mxc://matrix.org/FwXVuHOTPCJOZwjuunyMoDvw", |
| 104 | + "mxc://matrix.org/GBWoKBFhozIJcuuXzgAmESMh", |
| 105 | + "mxc://matrix.org/GadiqrOaESCBOpqEspzaFHZZ", |
| 106 | + "mxc://matrix.org/GbfNYPPXYfpYDGCPnxEOZACq", |
| 107 | + "mxc://matrix.org/HcOKfHoyUseJyNvJCZbySygK", |
| 108 | + "mxc://matrix.org/HjVgrKzDUXKrzYMDvtglFdvy", |
| 109 | + "mxc://matrix.org/IssHdyiXMcSnRCxCzqoaocGL", |
| 110 | + "mxc://matrix.org/JEPcTsDZpImzoyVdKHfeiUlK", |
| 111 | + "mxc://matrix.org/JQXLHcWNbcbQBMEWebxQPiPT", |
| 112 | + "mxc://matrix.org/JUFinhjLVhQhAmzsSpSaPFiT", |
| 113 | + "mxc://matrix.org/JUssqTzHorMXUbeaulQUNjTm", |
| 114 | + "mxc://matrix.org/KfkLMomWWjVZMbgVCKisfFPy", |
| 115 | + "mxc://matrix.org/LWCDUbJGEqfXWbuACLYPzpMM", |
| 116 | + "mxc://matrix.org/LfpqILSYnaIQDnCqGgrryaVA", |
| 117 | + "mxc://matrix.org/LlsgPelTpiYvvEgjbqKzefbr", |
| 118 | + "mxc://matrix.org/MKYSaqghosWAaMkfOTGqAXWu", |
| 119 | + "mxc://matrix.org/MSSWISKFrXqYAWwVZpgQzKNc", |
| 120 | + "mxc://matrix.org/MhFPyrortOJyjvIArZYRJNpd", |
| 121 | + "mxc://matrix.org/MohmbgPyrsnuKIYJivBLhnaJ", |
| 122 | + "mxc://matrix.org/MygYRbllJEcOXaGOySOEYMJc", |
| 123 | + "mxc://matrix.org/NZGChxcCXbBvgkCNZTLXlpux", |
| 124 | + "mxc://matrix.org/OVXDqAESXvavwJINbuwBeIHy", |
| 125 | + "mxc://matrix.org/PQWXmVjsGPqEgItiYEISwDzI", |
| 126 | + "mxc://matrix.org/QqFWSwNSKvlljlNZKBGrqCKR", |
| 127 | + "mxc://matrix.org/QsaeAloXAKVPsiczXtIBJzrZ", |
| 128 | + "mxc://matrix.org/RMMTwRenYWLPdRwIHlwuGCLG", |
| 129 | + "mxc://matrix.org/RnAJViaJiNHcGtTZgbRWXqlB", |
| 130 | + "mxc://matrix.org/SUpOMAcbPcYBaUnDikHYJOjh", |
| 131 | + "mxc://matrix.org/TGopDZiMVyhwhQBuEbUeFOKt", |
| 132 | + "mxc://matrix.org/TLEyVAuatPchpWniJrgmjUcU", |
| 133 | + "mxc://matrix.org/TlumUuzCcCGHSUMXNJmAFLML", |
| 134 | + "mxc://matrix.org/TpxNfvaFAAoZWdhwoYBHQezB", |
| 135 | + "mxc://matrix.org/VpjGllthGpjTPkvbJgOdyxkF", |
| 136 | + "mxc://matrix.org/WWvqnsZlhzWvPylUjdfhmrOV", |
| 137 | + "mxc://matrix.org/XBkKJIaWeXdfoYwMZsQWKjzj", |
| 138 | + "mxc://matrix.org/XmiRUvkkKjmTseRYrmBlvGNw", |
| 139 | + "mxc://matrix.org/XnDebYmBmnBBNeyBiUKltVlh", |
| 140 | + "mxc://matrix.org/XxylKIkLFThmHZjBMvCmipRT", |
| 141 | + "mxc://matrix.org/YtCeQeNxqnKsLvIcnwKIMlkV", |
| 142 | + "mxc://matrix.org/ZJIdWuBIRhObjOHVnoWfBUkq", |
| 143 | + "mxc://matrix.org/ZafPzsxMJtLaSaJXloBEKiws", |
| 144 | + "mxc://matrix.org/bCawIGTEGxaXxDIxIqteAhVU", |
| 145 | + "mxc://matrix.org/bDayqThxTIcGNcskzIADknRv", |
| 146 | + "mxc://matrix.org/bEVwopEQDMNjfzbiPKYgZXWU", |
| 147 | + "mxc://matrix.org/bHNoSLOERjdQrUodZUIFYAQl", |
| 148 | + "mxc://matrix.org/bSYOldVxWNFeulNUshiOSvlM", |
| 149 | + "mxc://matrix.org/bcBGBuKkVBITyyfjLHLVrPKj", |
| 150 | + "mxc://matrix.org/bipAEyCRqzXokNjHcDwbWXkO#auto", |
| 151 | + "mxc://matrix.org/cKhTXJzIZZjHfNRbNJHjxSxw", |
| 152 | + "mxc://matrix.org/cZEhMcslgpUJdTNMIuQSEukn", |
| 153 | + "mxc://matrix.org/djdngehyFuFlApXWpYotALoK", |
| 154 | + "mxc://matrix.org/eeSkBZDfQavoKeXjWhUGOCrI", |
| 155 | + "mxc://matrix.org/fJYvrULeLqUSuOFFhvAuPbVB", |
| 156 | + "mxc://matrix.org/gJNPpakWLvKGUYteErJnbqRw", |
| 157 | + "mxc://matrix.org/iNUefSlAXjkdNzXyVaYjiiTK", |
| 158 | + "mxc://matrix.org/jRqrnjimPBqTSSdJlOupMqSx", |
| 159 | + "mxc://matrix.org/jVqDFNtFnwfXedjMKZLgtnsY", |
| 160 | + "mxc://matrix.org/kOewGAJWihuVeafiSwgLeiJa", |
| 161 | + "mxc://matrix.org/lyWZOWsBRhCcxKRgVUbDdtux", |
| 162 | + "mxc://matrix.org/mhuskbkCQPvAXCCoZMMcUltg", |
| 163 | + "mxc://matrix.org/nKpRPUortweIAocZOKakSmle", |
| 164 | + "mxc://matrix.org/nwWAiyZHhWuATgUqhXSUgyOq", |
| 165 | + "mxc://matrix.org/oUxxDyzQOHdVDMxgwFzyCWEe", |
| 166 | + "mxc://matrix.org/oqUhSAlhShWRUoOypviZYzCl", |
| 167 | + "mxc://matrix.org/owHbMxnvtZQhORPMIjEMhHJC", |
| 168 | + "mxc://matrix.org/paFLquBfsoSUMExpgOePaYGn", |
| 169 | + "mxc://matrix.org/pcyhRmMTlUPZNUWLBrrBYOUF", |
| 170 | + "mxc://matrix.org/qCJQIqJLUntAlQjvjVqqkISE", |
| 171 | + "mxc://matrix.org/qyoRKkkSwwqoaseeRDCWGmgL", |
| 172 | + "mxc://matrix.org/rAtNyCxKhZKYjIpCMTMVIyZb", |
| 173 | + "mxc://matrix.org/stXVscjfSSwEGcpNUOaTOmuw", |
| 174 | + "mxc://matrix.org/tmemWZxwaiSRLneppvjscbSv", |
| 175 | + "mxc://matrix.org/uFsobEhOojpEXTORyXJznvMf", |
| 176 | + "mxc://matrix.org/wEydarIdYNQoHHnOpfYGQAkZ", |
| 177 | + "mxc://matrix.org/xppypIFIDuFCqmdJHGjTuRsk", |
| 178 | + "mxc://matrix.org/yAEcXFYGUHsLALuVuHtqgsPk", |
| 179 | + "mxc://matrix.org/yCdHqfZAMYzGsSeCYODLGNJQ", |
| 180 | + "mxc://matrix.org/zRHixRxWSlriuAyCEqxKcsUN", |
| 181 | + "mxc://matrix.tarina.org/yQAGQhgyZtbJDzoCxcUoNlte", |
| 182 | + "mxc://maunium.net/jdlSfvudiMSmcRrleeiYjjFO", |
| 183 | + "mxc://mccarty.io/uCPFlUrLVWMrjuZVDnlIzIoI", |
| 184 | + "mxc://medienhaus.dev/RSWiRFctJPQRAfLGfUTIWqCo", |
| 185 | + "mxc://moritzdietz.com/oPOkWTlBWdTFbwXuGZNxbpAU", |
| 186 | + "mxc://mozilla.org/66d994693725ea09256c22ac43b0e74e79f1abb4", |
| 187 | + "mxc://mpl.mpg.de/lxwOKWWbfwlGxAMKhNIfiJRR", |
| 188 | + "mxc://msg-net.de/uqthdSIKEsmLlAnrguhOBSRg", |
| 189 | + "mxc://mx.anismk.de/hjKAFiGKMasHOCdEVPsmoozA", |
| 190 | + "mxc://mx.grupotd.nat.cu/ZfxNoISumlPZZEHqRNbhewQW", |
| 191 | + "mxc://neko.dev/wLFwLqbnyvrstuomVXdKMqyJ", |
| 192 | + "mxc://nevarro.space/WmGsIGgESPTtJFskYIXdRlVM", |
| 193 | + "mxc://obermui.de/pCkwyNUtzdnaImzuqbsaJCgV", |
| 194 | + "mxc://perthchat.org/sNAywRrlPKygmkoxpfxSTrFz", |
| 195 | + "mxc://pixelplanet.fun/xfxdQZvpLePdlNcRIjoFovPE", |
| 196 | + "mxc://pixie.town/fq3MchyYAMzpCkfxbqr9WffR", |
| 197 | + "mxc://pixie.town/qBpNzYpOknBxnSdcbFWrbqWT", |
| 198 | + "mxc://raim.ist/oInPkqchozNTmIOeUXlCsFbp", |
| 199 | + "mxc://riot.ovh/PJxWnOsjdnIpkByXMFJVGZgE", |
| 200 | + "mxc://rs485.network/XpMPNjUVJmwwVQyaVtkAjpfl", |
| 201 | + "mxc://scamdemic.wtf/WFPdCxatgVIQcYOqkWDKVsXP", |
| 202 | + "mxc://seymour.family/ZlzrDJSjRnQYuWJGvhdCkyiS", |
| 203 | + "mxc://shiina.family/zxIxLfIyoXTeclPZznmIdRli", |
| 204 | + "mxc://simonatherley.com/nYEzJcoThHfARGPSkHXRGapn", |
| 205 | + "mxc://skyforge.at/RExFPAnBOsbCqFZIFHAESyKQ", |
| 206 | + "mxc://stratum0.org/FKcEkoEcEutsdRUaPjQitDwo", |
| 207 | + "mxc://sw1v.org/rARZrbDMGnNQOKKWZtCVxusq", |
| 208 | + "mxc://t2l.io/fYhaPLjAZLwEYqaSGKwRpQgk", |
| 209 | + "mxc://that.host/QbAhNvUApAEpvCKNWtIZwjCO", |
| 210 | + "mxc://the-apothecary.club/HScGQAQKwuQbbdNkLYoPpsNb", |
| 211 | + "mxc://tout.im/VQpPnZfufsMWerGlxkupbtYo", |
| 212 | + "mxc://uhoreg.ca/JbcxMQHvPoPUoRkwQRdmwXKm", |
| 213 | + "mxc://veganism.social/dDVjvEJugTUfWfiavHKhvCxi", |
| 214 | + "mxc://wi11.co.uk/DztCMbxBfOUrmklICETzYOEJ", |
| 215 | + "mxc://yaal.coop/BviDGOwocxQQNndowuZmhxGr", |
| 216 | +] |
| 217 | + |
| 218 | + |
| 219 | +async def request_media(session: aiohttp.ClientSession, media_url: str) -> int: |
| 220 | + media_id = media_url.removeprefix("mxc://") |
| 221 | + url = f"http://localhost:8080/_matrix/media_proxy/unstable/download/{media_id}" |
| 222 | + |
| 223 | + # timeout = aiohttp.ClientTimeout(total=10) |
| 224 | + async with session.get(url) as response: |
| 225 | + await response.read() |
| 226 | + if "-v" not in sys.argv: |
| 227 | + # Simple progress meter |
| 228 | + print(".", end="", flush=True) |
| 229 | + |
| 230 | + return response.status |
| 231 | + |
| 232 | + |
| 233 | +async def main() -> None: |
| 234 | + perfdir = os.path.dirname(__file__) |
| 235 | + os.makedirs(os.path.join(perfdir, "temp"), exist_ok=True) |
| 236 | + |
| 237 | + print(f"number of URLs: {len(AVATAR_URLS_TAKEN_FROM_SYNAPSE_DEV)}") |
| 238 | + |
| 239 | + server = None |
| 240 | + try: |
| 241 | + server = subprocess.Popen( |
| 242 | + args=[ |
| 243 | + sys.executable, |
| 244 | + "-m", |
| 245 | + "matrix_content_scanner.mcs", |
| 246 | + "-c", |
| 247 | + "config.yaml", |
| 248 | + ], |
| 249 | + cwd=perfdir, |
| 250 | + stdin=subprocess.DEVNULL, |
| 251 | + stdout=None if "-v" in sys.argv else subprocess.DEVNULL, |
| 252 | + stderr=None if "-v" in sys.argv else subprocess.DEVNULL, |
| 253 | + ) |
| 254 | + |
| 255 | + # Give server time to startup |
| 256 | + time.sleep(0.5) |
| 257 | + |
| 258 | + await run_test() |
| 259 | + # Run test a second time, now that caches have warmed up |
| 260 | + await run_test() |
| 261 | + finally: |
| 262 | + if server is not None: |
| 263 | + server.terminate() |
| 264 | + print("Server return code:", server.returncode) |
| 265 | + |
| 266 | + |
| 267 | +async def run_test() -> None: |
| 268 | + failed = False |
| 269 | + start = timer() |
| 270 | + try: |
| 271 | + async with aiohttp.ClientSession() as session: |
| 272 | + requests = [] |
| 273 | + for url in AVATAR_URLS_TAKEN_FROM_SYNAPSE_DEV: |
| 274 | + requests.append(asyncio.ensure_future(request_media(session, url))) |
| 275 | + |
| 276 | + statuses = await asyncio.gather(*requests) |
| 277 | + print() |
| 278 | + print("Status codes from scanner server:", collections.Counter(statuses)) |
| 279 | + except Exception: |
| 280 | + traceback.print_exc() |
| 281 | + failed = True |
| 282 | + finally: |
| 283 | + end = timer() |
| 284 | + duration = end - start |
| 285 | + print(f"{'Failed' if failed else 'Succeeded'} in {duration:.2f}s") |
| 286 | + |
| 287 | + |
| 288 | +if __name__ == "__main__": |
| 289 | + asyncio.run(main()) |
0 commit comments