From b2b36f0e401f8f2bb2a805b3a812afa46582499c Mon Sep 17 00:00:00 2001 From: RedYetiDev <38299977+RedYetiDev@users.noreply.github.com> Date: Wed, 29 May 2024 08:18:37 -0400 Subject: [PATCH] remove metrics data page --- .../resources/config/nodejs.org | 10 - .../tools/metrics/country-lookup.py | 37 -- .../tools/metrics/download-counts.awk | 143 ------- .../tools/metrics/download-counts.sh | 100 ----- .../tools/metrics/download-summaries.awk | 129 ------ ansible/www-standalone/tools/metrics/plot.gp | 97 ----- .../tools/metrics/public-index.html | 386 ------------------ .../tools/metrics/public-index.md | 104 ----- .../tools/metrics/strip-travis.pl | 13 - .../tools/metrics/summary-arch.awk | 41 -- .../tools/metrics/summary-country.awk | 41 -- .../tools/metrics/summary-os.awk | 41 -- .../tools/metrics/summary-total.awk | 18 - .../tools/metrics/summary-version.awk | 51 --- .../tools/metrics/update-geoipdb.sh | 3 - 15 files changed, 1214 deletions(-) delete mode 100755 ansible/www-standalone/tools/metrics/country-lookup.py delete mode 100644 ansible/www-standalone/tools/metrics/download-counts.awk delete mode 100755 ansible/www-standalone/tools/metrics/download-counts.sh delete mode 100644 ansible/www-standalone/tools/metrics/download-summaries.awk delete mode 100644 ansible/www-standalone/tools/metrics/plot.gp delete mode 100644 ansible/www-standalone/tools/metrics/public-index.html delete mode 100644 ansible/www-standalone/tools/metrics/public-index.md delete mode 100755 ansible/www-standalone/tools/metrics/strip-travis.pl delete mode 100644 ansible/www-standalone/tools/metrics/summary-arch.awk delete mode 100644 ansible/www-standalone/tools/metrics/summary-country.awk delete mode 100644 ansible/www-standalone/tools/metrics/summary-os.awk delete mode 100644 ansible/www-standalone/tools/metrics/summary-total.awk delete mode 100644 ansible/www-standalone/tools/metrics/summary-version.awk delete mode 100755 ansible/www-standalone/tools/metrics/update-geoipdb.sh diff --git a/ansible/www-standalone/resources/config/nodejs.org b/ansible/www-standalone/resources/config/nodejs.org index 1e7db28f9..86ad86810 100644 --- a/ansible/www-standalone/resources/config/nodejs.org +++ b/ansible/www-standalone/resources/config/nodejs.org @@ -251,16 +251,6 @@ server { rewrite ^/documentation/api(.*)$ /api$1 redirect; } - # This Location is used for our Metrics Pages - # We keep `autoindex` on as some folders are autoindex (do not have an index.html) - # even tho the main /metric endpoint is a build page (statically generated over time) - # We use ^~ to tell NGINX to not process any other Location directive or Rewrite after this match - location ^~ /metrics { - alias /home/dist/metrics; - autoindex on; - default_type text/plain; - } - # When a website 404 occurs, attempt to load the English version of the page # if the request was for a localised page. # Also, store the original language of the request if it was localised diff --git a/ansible/www-standalone/tools/metrics/country-lookup.py b/ansible/www-standalone/tools/metrics/country-lookup.py deleted file mode 100755 index b7bb25d3c..000000000 --- a/ansible/www-standalone/tools/metrics/country-lookup.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -import sys -import csv -import geoip2.database -import os - -reader = geoip2.database.Reader(os.path.dirname(os.path.realpath(__file__)) + '/GeoLite2-City.mmdb') - -logFileWriter = csv.writer(sys.stdout, quoting=csv.QUOTE_MINIMAL) -logFileReader = csv.reader(sys.stdin) - -for row in logFileReader: - if row[0] == "ip": - row.pop(0) - row.insert(1, "country") - row.insert(2, "region") - logFileWriter.writerow(row) - continue - - country = "" - region = "" - - try: - georec = reader.city(row.pop(0)) - if georec: - if georec.country.iso_code: - country = georec.country.iso_code - if georec.subdivisions.most_specific.iso_code: - region = georec.subdivisions.most_specific.iso_code - except Exception: - pass - - row.insert(1, country.encode('utf-8')) - row.insert(2, region.encode('utf-8')) - - logFileWriter.writerow(row) diff --git a/ansible/www-standalone/tools/metrics/download-counts.awk b/ansible/www-standalone/tools/metrics/download-counts.awk deleted file mode 100644 index 26d5294de..000000000 --- a/ansible/www-standalone/tools/metrics/download-counts.awk +++ /dev/null @@ -1,143 +0,0 @@ -BEGIN { - # Utility array to convert logfile month names to month numbers - split("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec", month) - for (i in month) { - month_nums[month[i]] = (i / 1 >= 10 ? "" : "0") i - } - fmt = "%s,%s,%s,%s,%s,%s,%s\n" - printf fmt, "ip", "day", "path", "version", "os", "arch", "bytes" -} - -(!($6 == "\"GET" || $6 == "GET")) { next } # non-GET requests - -($10 < 1000) { next } # unreasonably small download - -!/\.(tar\.gz|tar\.xz|pkg|msi|exe|zip|7z)(\?[^ ]+)? HTTP\/[12]\.[10][" ]/ { - #print "Skipping:", $0 - #check we're not missing anything with: grep Skipping /tmp/_var_log_nginx_nodejs.org-access.log | awk '{print $10 $8}'| grep -v '/$\|html$\|png$\|svg$\|json\|jpg$\|xml$\|txt$\|jar$\|js$\|pom$\|css$\|ico$\|zip$\|lib$\|exp$\|^40\|^30\|tab$\|eps$\|asc$\|gpg$\|pdf$\|tgz$\|\?\|\#\|pdb$\|rtf$\|md$\|SHASUMS' - next -} - -{ gsub("\"", "", $9) } - -($9 < 200 || $9 > 300) { next } # status code not ~200 - -{ - success = match($0, \ - / \[([^:]+).* "?GET (\/+(dist|download\/+release)\/+(node-latest\.tar\.gz|([^/]+)\/+((win-x64|win-x86|win-arm64|x64)?\/+?node\.exe|(x64\/)?node-+(v[0-9\.]+)[\-\.]([^\? ]+))))(\?[^ ]+)? HTTP\/[12]\.[01][" ]/ \ - , m \ - ) - - if (success) { - date = m[1] - day = substr(date, 8, 4) "-" month_nums[substr(date, 4, 3)] "-" substr(date, 1, 2) - - path = m[2] - #m[3]=root - #latestSrc = (m[4] == "node-latest.tar.gz") - pathVersion = m[5] - file = m[6] - winArch = m[7] - #m[8]=x64/? - - # version can come from the filename or the path, filename is best - # but it may not be there (e.g. node.exe) so fall back to path version - fileVersion = m[9] - if (match(fileVersion, /^v[0-9\.]+$/)) { - version = fileVersion - } else if (match(pathVersion, /^v[0-9\.]+$/)) { - version = pathVersion - } else { - version = "" - } - - fileType = m[10] - #m[11]=query string - #m[12]=ip including quotes - - if (match(fileType, /^headers\.tar\..z$/)) { - os = "headers" - } else if (match(fileType, /^linux-/)) { - os = "linux" - } else if (fileType == "pkg" || match(fileType, /^darwin-/)) { - os = "osx" - } else if (match(fileType, /^sunos-/)) { - os = "sunos" - } else if (match(fileType, /^aix-/)) { - os = "aix" - } else if (match(fileType, /msi$/) || match(file, /node\.exe$/) || match(fileType, /^win-/)) { - os = "win" - } else if (match(fileType, /^tar\..z$/) || match(path, /\/node-latest\.tar\.gz$/)) { - os = "src" - } else { - os = "" - } - - if (index(fileType, "x64") > 0 || fileType == "pkg") { - # .pkg for Node.js <= 0.12 were universal so may be used for either x64 or x86 - arch = "x64" - } else if (index(fileType, "x86") > 0) { - arch = "x86" - } else if (index(fileType, "armv6") > 0) { - arch = "armv6l" - } else if (index(fileType, "armv7") > 0) { # 4.1.0 had a misnamed binary, no "l" in "armv7l" - arch = "armv7l" - } else if (index(fileType, "arm64") > 0) { - arch = "arm64" - } else if (index(fileType, "ppc64le") > 0) { - arch = "ppc64le" - } else if (index(fileType, "ppc64") > 0) { - arch = "ppc64" - } else if (index(fileType, "s390x") > 0) { - arch = "s390x" - } else if (os == "win") { - # we get here for older .msi files and node.exe files - if (index(winArch, "x64") > 0) { - # could be "x64" or "win-x64" - arch = "x64" - } else { - # could be "win-x86" or "" - arch = "x86" - } - } else { - arch = "" - } - - bytes = $10 - - # IP address is tricky, it should be (but may not be in some logs) the last quoted field in the - # log but because of awk's greed regexes and unpredictable user-agent strings we can't build - # it into the main regex above, so we re-split the line by quotes and check the final string. - # The field comes from X-Forwarded-For and can contain any number of comma-separated (possibly - # with space characters too) IPv4 or IPv6 addresses, or "unknown" (non-standard but it shows - # up. To get the most likely usable IP address closest to the user we have to find the left-most - # valid, _public_ address in the list. - quotl = split($0, quots, "\"") - ipfield = quots[quotl - 1] - ipmatch = match(ipfield, /^(([0-9a-f:\.]+|unknown)([ ,])*)+$/) - if (ipmatch) { - gsub(/\s/, "", ipfield) # strip spaces - ipc = split(ipfield, ips, ",") - if (ipc == 1) { # just one address, yay - ip = ipfield - } else { - ip = "" - for (i = 1; i <= ipc; i++) { - # find first valid, non-private IP address - if (match(ips[i], /^[0-9a-f:\.]+$/) && !match(ips[i], /(^127\.)|(^192\.168\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^::1$)|(^[fF][cCdD])/)) { - ip = ips[i] - break - } - } - } - } else if (ipfield == "-") { # probably old log file, use direct ip instead - ip = $1 - } else { - ip = "" - } - - printf fmt, ip, day, path, version, os, arch, bytes - #} else { - #print "WARNING: Could not parse line " NR " [" $0 "]" > "/dev/stderr" - } -} diff --git a/ansible/www-standalone/tools/metrics/download-counts.sh b/ansible/www-standalone/tools/metrics/download-counts.sh deleted file mode 100755 index 0444383f4..000000000 --- a/ansible/www-standalone/tools/metrics/download-counts.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env bash - -set -e - -__dirname="$(CDPATH= cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -outputdir=/home/dist/metrics -logsoutputdir=${outputdir}/logs -summariesoutputdir=${outputdir}/summaries -summarytypes="arch country os total version" - -mkdir -p $logsoutputdir -for type in $summarytypes; do - mkdir -p ${summariesoutputdir}/${type} -done - -function catfile { - file=$1 - if [[ $i =~ \.xz ]]; then - xzcat $1 - elif [[ $i =~ \.gz ]]; then - zcat $1 - else - cat $1 - fi -} - - -function processfile { - basename=$1 - infile=$2 - outfile=$3 - tmpout="/tmp/~${basename}" - - echo "Processing ${infile} ..." - catfile $infile \ - | gawk -f ${__dirname}/download-counts.awk \ - | ${__dirname}/country-lookup.py \ - > $tmpout - echo "Moving data to ${outfile} ..." - mv $tmpout $outfile -} - - -function processsummaries { - basename=$1 - infile=$2 - tmpdir="/tmp/~${basename}.summary" - - echo "Processing summaries for ${infile} ..." - ( - mkdir -p $tmpdir && \ - cd $tmpdir && \ - gawk -f ${__dirname}/download-summaries.awk $infile - ) - - for type in $summarytypes; do - mv ${tmpdir}/${type}.csv ${summariesoutputdir}/${type}/${basename}.csv - done - rm -rf $tmpdir -} - - -echo "Processing log files ..." - -for i in $(ls /var/log/nginx/nodejs*/nodejs.org-access.log* | grep -v orig); do - basename=$(basename $i | sed 's/\.[xg]z//') - basenameout=$basename - if [[ $i =~ nodejs-backup ]]; then - basenameout="${basename}b" - fi - outfile=${logsoutputdir}/${basenameout}.csv - summaryexists=true - - #if [ ! -f ${outfile} ]; then - if [[ $i =~ nodejs\.org-access\.log$ ]] || [ ! -f ${outfile} -a ! -f ${outfile}.gz ]; then - processfile $basename $i $outfile - summaryexists=false - fi - - for type in $summarytypes; do - if [ ! -f ${summariesoutputdir}/${type}/${basenameout}.csv ]; then - summaryexists=false - fi - done - - #summaryexists=true - if [ "$summaryexists" = "false" ]; then - processsummaries $basenameout $outfile - fi -done; - -echo "Processing final summaries ..." - -for type in $summarytypes; do - echo " ... $type" - gawk -f ${__dirname}/summary-${type}.awk ${summariesoutputdir}/${type}/*.csv > ${summariesoutputdir}/${type}.csv -done - -echo "Creating graphs ..." -gnuplot -e "inputdir='${summariesoutputdir}'; outputdir='${summariesoutputdir}'" ${__dirname}/plot.gp diff --git a/ansible/www-standalone/tools/metrics/download-summaries.awk b/ansible/www-standalone/tools/metrics/download-summaries.awk deleted file mode 100644 index 369932da5..000000000 --- a/ansible/www-standalone/tools/metrics/download-summaries.awk +++ /dev/null @@ -1,129 +0,0 @@ -BEGIN { - # proper CSV delimiting, including quoted fields containing commas - FPAT = "([^,]*)|(\"[^\"]*\")" -} - -# 1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 -# day,country,region,path,version,os,arch,bytes -# 2015-12-13,US,VA,/dist/v0.10.33/node-v0.10.33-linux-x64.tar.gz,v0.10.33,linux,x64,5645609 - -# skip headers -/^day,/ { next } - -{ - day = $1 - totals[day]++ - country_names[$2]++ - countries[day][$2]++ - # take only the semver-minor for versions < 1 and only semver-major for >= 1 - if ($5 == "") { - version = "unknown" - } else { - gsub("v", "", $5) - split($5, versionSplit, ".") - if (versionSplit[1] == 0) { - version = "0." versionSplit[2] - } else { - version = versionSplit[1] - } - } - version_names[version] = 1 - versions[day][version]++ - os_names[$6] = 1 - oss[day][$6]++ - arch_names[$7] = 1 - archs[day][$7]++ - bytes[day] += $8 / 1024 / 1024 -} - - -# sort versions by semver, i.e. 1 < 0.10 < 0.1 -function version_sort (i1, v1, i2, v2) { - v1 = v1 * (match(v1, /^0.[0-9]$/) ? 10 : 100) - v2 = v2 * (match(v2, /^0.[0-9]$/) ? 10 : 100) - if (v1 < v2) return -1 - if (v1 > v2) return 1 - return 0 -} - - -END { - len = 0 - for (day in versions) { - odays[++len] = day - } - asort(odays) - - PROCINFO["sorted_in"] = "@val_num_desc" - clen = 0 - for (country in country_names) { - ocountries[++clen] = country - #if (i++ >= top_countries) break - } - delete PROCINFO["sorted_in"] - - vlen = 0 - for (version in version_names) { - oversions[++vlen] = version - } - asort(oversions, oversions, "version_sort") - - olen = 0 - for (os in os_names) { - ooss[++olen] = os - } - asort(ooss) - - alen = 0 - for (arch in arch_names) { - oarchs[++alen] = arch - } - asort(oarchs) - - printf "day" > "country.csv" - printf "day" > "version.csv" - printf "day" > "os.csv" - printf "day" > "arch.csv" - printf "day,downloads,GiB\n" > "total.csv" - for (i = 1; i <= clen; i++) { - printf ",%s", ocountries[i] ? ocountries[i] : "unknown" > "country.csv" - } - for (i = 1; i <= vlen; i++) { - printf ",%s", oversions[i] > "version.csv" - } - for (i = 1; i <= olen; i++) { - printf ",%s", ooss[i] ? ooss[i] : "unknown" > "os.csv" - } - for (i = 1; i <= alen; i++) { - printf ",%s", oarchs[i] ? oarchs[i] : "unknown" > "arch.csv" - } - printf "\n" > "country.csv" - printf "\n" > "version.csv" - printf "\n" > "os.csv" - printf "\n" > "arch.csv" - for (i = 1; i <= len; i++) { - day = odays[i] - printf "%s", day > "country.csv" - printf "%s", day > "version.csv" - printf "%s", day > "os.csv" - printf "%s", day > "arch.csv" - printf "%s,%s,%s\n", day, totals[day], bytes[day] / 1024 > "total.csv" - - for (j = 1; j <= clen; j++) { - printf ",%s", countries[day][ocountries[j]] > "country.csv" - } - for (j = 1; j <= vlen; j++) { - printf ",%s", versions[day][oversions[j]] > "version.csv" - } - for (j = 1; j <= olen; j++) { - printf ",%s", oss[day][ooss[j]] > "os.csv" - } - for (j = 1; j <= alen; j++) { - printf ",%s", archs[day][oarchs[j]] > "arch.csv" - } - printf "\n" > "country.csv" - printf "\n" > "version.csv" - printf "\n" > "os.csv" - printf "\n" > "arch.csv" - } -} diff --git a/ansible/www-standalone/tools/metrics/plot.gp b/ansible/www-standalone/tools/metrics/plot.gp deleted file mode 100644 index a715c4a11..000000000 --- a/ansible/www-standalone/tools/metrics/plot.gp +++ /dev/null @@ -1,97 +0,0 @@ -# requires "inputdir" and "outputfile" - -sourcename = "nodejs.org" -countrycolumns = system("awk -F, 'NR == 1 { print NF; exit }' " . inputdir . "/country.csv") -versioncolumns = system("awk -F, 'NR == 1 { print NF; exit }' " . inputdir . "/version.csv") -archcolumns = system("awk -F, 'NR == 1 { print NF; exit }' " . inputdir . "/arch.csv") -oscolumns = system("awk -F, 'NR == 1 { print NF; exit }' " . inputdir . "/os.csv") - -set macros -LABELSETTINGS = "at graph 0.5,0.96 center font 'Ubuntu Mono,14' textcolor rgb \"#9999a5\"" - -set linetype 1 lc rgb "#444444" lw 1 -set linetype 2 lc rgb "#E41A1C" lw 1 -set linetype 3 lc rgb "#377EB8" lw 1 -set linetype 4 lc rgb "#4DAF4A" lw 1 -set linetype 5 lc rgb "#984EA3" lw 1 -set linetype 6 lc rgb "#FF7F00" lw 1 -set linetype 7 lc rgb "#D0D033" lw 1 -set linetype 8 lc rgb "#A65628" lw 1 -set linetype 9 lc rgb "#F781BF" lw 1 -set linetype 10 lc rgb "#93F41D" lw 1 -set linetype 11 lc rgb "#79FFDB" lw 1 -set linetype 12 lc rgb "#F213EA" lw 1 -set linetype cycle 1 - -set datafile sep "," - -countrysums = "" -do for [i = 2:countrycolumns] { - stats inputdir . "/country.csv" using i nooutput - countrysums = countrysums . " " . int(STATS_sum) -} -versionsums = "" -do for [i = 2:versioncolumns] { - stats inputdir . "/version.csv" using i nooutput - versionsums = versionsums . " " . int(STATS_sum) -} -ossums = "" -do for [i = 2:oscolumns] { - stats inputdir . "/os.csv" using i nooutput - ossums = ossums . " " . int(STATS_sum) -} -archsums = "" -do for [i = 2:archcolumns] { - stats inputdir . "/arch.csv" using i nooutput - archsums = archsums . " " . int(STATS_sum) -} -stats inputdir . "/total.csv" using 2 nooutput -totalsum = int(STATS_sum) -stats inputdir . "/total.csv" using 3 nooutput -totalbytes = int(STATS_sum) - -set timefmt "%Y-%m-%d" -set format x "%b-%Y" -set xdata time -set style data lines -set key tc variable - -#set xrange ["2015-01-01" < * :] -#set yrange [0 < * < 0:25000] - -set border lc rgb "#dddde5" -set xtics textcolor rgb "#9999a5" -set ytics textcolor rgb "#9999a5" - -set term pngcairo size 2000,800 font "Ubuntu Mono,11" background rgb "#fefeff" - -set key left Left maxcols 1 maxrows 14 - -set output outputdir . "/country.png" -set label 1 "Top Countries (" . sourcename . ")" @LABELSETTINGS -plot for [i=2:13] inputdir . "/country.csv" every ::1 \ - using 1:i title sprintf(system("head -1 " . inputdir . "/country.csv | awk -F, '{ print $".i." }'")." / day (total: %'dk)", word(countrysums, i-1) / 1000) lw 2 - -set output outputdir . "/version.png" -set label 1 "Node.js Versions (" . sourcename . ")" @LABELSETTINGS -plot for [i=2:versioncolumns] inputdir . "/version.csv" every ::1 \ - using 1:i title sprintf(system("head -1 " . inputdir . "/version.csv | awk -F, '{ print $".i." }'")." / day (total: %'dk)", word(versionsums, i-1) / 1000) lw 2 - -set output outputdir . "/os.png" -set label 1 "Operating Systems (" . sourcename . ")" @LABELSETTINGS -plot for [i=2:oscolumns] inputdir . "/os.csv" every ::1 \ - using 1:i title sprintf(system("head -1 " . inputdir . "/os.csv | awk -F, '{ print $".i." }'")." / day (total: %'dk)", word(ossums, i-1) / 1000) lw 2 - -set output outputdir . "/arch.png" -set label 1 "Architectures (" . sourcename . ")" @LABELSETTINGS -plot for [i=2:archcolumns] inputdir . "/arch.csv" every ::1 \ - using 1:i title sprintf(system("head -1 " . inputdir . "/arch.csv | awk -F, '{ print $".i." }'")." / day (total: %'dk)", word(archsums, i-1) / 1000) lw 2 - -set output outputdir . "/total.png" -set ytics textcolor rgb "#E41A1C" -set y2tics textcolor rgb "#377EB8" -set label 1 "Total downloads (" . sourcename . ")" @LABELSETTINGS -plot inputdir . "/total.csv" every ::1 \ - using 1:2 title sprintf("Downloads per day (total: %'dk)", totalsum / 1000) lw 2, \ - "" using 1:($3 * 1024) title sprintf("MiB per day (total: %'d TiB)", totalbytes) lw 2 axes x1y2 - diff --git a/ansible/www-standalone/tools/metrics/public-index.html b/ansible/www-standalone/tools/metrics/public-index.html deleted file mode 100644 index 569e0c0ac..000000000 --- a/ansible/www-standalone/tools/metrics/public-index.html +++ /dev/null @@ -1,386 +0,0 @@ - - - - - - - - - - - - - - -
-

nodejs.org Download Metrics

-

For "current" metrics go here. Everything below is just historic data

-

This directory contains anonymized log records for binary and source downloads of Node.js from nodejs.org.

-

What data is available?

-

There is roughly one log file per day, starting on the 14th of May 2014 to the current day. There is a gap in the data from the 1st to the 21st of September 2015 due to a server configuration error.

-

The data is gleaned from the access logs by matching for known binary and source files in /dist and the newer /download/release/ which is where the /dist/ directory also points. No other access log information is included in the data available here.

-

IP addresses and exact times are not reported, only days and geolocation data for the original IP addresses.

-

What format is the data in?

-

Raw log files are available in the ./logs/ sub-directory where each file's name takes the form: nodejs.org-access.log.YYYYMMDD.TTTTTTTTTT.csv, where the last entry in the file is used to create the string YYYYMMDD from the year, month and day of the month respectively and TTTTTTTTTT as the unix epoch timestamp. There may zero, one or two log files for a given day. However, when stitched together they should form a continuous record of the downloads from nodejs.org.

-

There is always a nodejs.org-access.log.csv file which represents the current day's data and is not final, i.e. it will change from update to update, either appending new data or starting again for a new day. The other log files can be considered final until we decide to adjust the format at some point in the future.

-

The raw log files are comma-separated value format with the following columns: day, country, region, path, version, os, arch, bytes.

-

Country and region are calculated by using MaxMind's GeoLite2 City database and some entries may contain blank values where the look-up fails. X-Forwarded-For headers are used to determine the most likely origin IP address by parsing out the leftmost non-private address.

-

The path field contains the actual path that was requested by the client, with the version, os and arch columns calculated from this value.

-

The version field is occasionaly empty due to the availability of node-latest.tar.gz which is a symlink to the latest source tarball and the various latest directory symlinks when used to download node.exe (versions could be roughly calculated for all of these when matched with release dates if desired).

-

The os field contains operating system identifiers as well as src for source tarballs and headers for header tarballs.

-

The arch field is blank when os is src or headers.

-

Pre-processed summary data

-

A set of pre-processed summary data is also made available in the ./summaries/ sub-directory. Each type of summary consists of:

- -

Total

-

Contains two data columns: downloads and TiB, where TiB is 240 bytes.

-

Source data: ./summaries/total/

-

Aggregate data: ./summaries/total.csv

-

Plot:

-

-

Architectures

-

Contains a data column per distributed architecture, including unknown where the architecture cannot be determined (source or header tarballs). The columns are ordered by totals where the architecture that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand when additional architectures are distributed from nodejs.org. The .pkg OS X installers are counted as x64 even though, prior to Node.js v4, they were "universal binaries" containing both x64 and x86 versions, usable on both architectures.

-

Source data: ./summaries/arch/

-

Aggregate data: ./summaries/arch.csv

-

Plot:

-

-

Countries

-

Contains a data column per country from the geolocation data, including unknown where a country could not be determined. The column names take the form of ISO 3166 country codes. The columns are ordered by totals where the country with the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand if additional countries not already listed are discovered via geolocation.

-

Source data: ./summaries/country/

-

Aggregate data: ./summaries/country.csv

-

Plot:

-

-

Operating Systems

-

Contains a data column per distributed operating system, including unknown where the operating system cannot be determined (due to node-latest.tar.gz), src for source tarballs and headers for header tarballs. The columns are ordered by totals where the operating system that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand when additional operating systems are distributed from nodejs.org.

-

Source data: ./summaries/os/

-

Aggregate data: ./summaries/os.csv

-

Plot:

-

-

Versions

-

Contains a data column per significant version number of Node.js. For <= 0.12, the semver-minor version number is listed, for >= 4.x the semver-major version number is listed. The unknown column contains counts of downloads where the version number could not be determined (see above note about node-latest.tar.gz and the latest directory symlinks coupled with node.exe). The columns are ordered by totals where the version that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and will expand when additional significant Node.js versions are made available for download.

-

Source data: ./summaries/version/

-

Aggregate data: ./summaries/version.csv

-

Plot:

-

-

Additional stuff

-

The source of this file along with the various scripts used to generate the data files and graphs can be found in the nodejs/build GitHub repository in the ansible/www-standalone/tools/metrics directory. Questions, suggestions and pull requests are welcome in that repository.

-
-
- - diff --git a/ansible/www-standalone/tools/metrics/public-index.md b/ansible/www-standalone/tools/metrics/public-index.md deleted file mode 100644 index 2d86945ea..000000000 --- a/ansible/www-standalone/tools/metrics/public-index.md +++ /dev/null @@ -1,104 +0,0 @@ -# nodejs.org Download Metrics - -This directory contains anonymized log records for binary and source downloads of Node.js from nodejs.org. - -## What data is available? - -There is roughly one log file per day, starting on the 14th of May 2014 to the current day. There is a gap in the data from the 1st to the 21st of September 2015 due to a server configuration error. - -The data is gleaned from the access logs by matching for known binary and source files in [/dist](https://nodejs.org/dist/) and the newer [/download/release/](https://nodejs.org/download/release) which is where the /dist/ directory also points. No other access log information is included in the data available here. - -IP addresses and exact times are not reported, only days and geolocation data for the original IP addresses. - -## What format is the data in? - -Raw log files are available in the **[./logs/](./logs/)** sub-directory where each file's name takes the form: `nodejs.org-access.log.YYYYMMDD.TTTTTTTTTT.csv`, where the last entry in the file is used to create the string `YYYYMMDD` from the year, month and day of the month respectively and `TTTTTTTTTT` as the unix epoch timestamp. There may zero, one or two log files for a given day. However, when stitched together they should form a continuous record of the downloads from nodejs.org. - -There is always a [nodejs.org-access.log.csv](./logs/nodejs.org-access.log.csv) file which represents the _current day's_ data and **is not final**, i.e. it will change from update to update, either appending new data or starting again for a new day. The other log files can be considered final until we decide to adjust the format at some point in the future. - -The raw log files are comma-separated value format with the following columns: day, country, region, path, version, os, arch, bytes. - -***Country*** and ***region*** are calculated by using MaxMind's [GeoLite2 City](http://dev.maxmind.com/geoip/geoip2/geolite2/) database and some entries may contain blank values where the look-up fails. `X-Forwarded-For` headers are used to determine the most likely origin IP address by parsing out the [leftmost non-private address](https://r.va.gg/2011/07/wrangling-the-x-forwarded-for-header.html). - -The ***path*** field contains the actual path that was requested by the client, with the ***version***, ***os*** and ***arch*** columns calculated from this value. - -The ***version*** field is occasionaly empty due to the availability of `node-latest.tar.gz` which is a symlink to the latest source tarball and the various `latest` directory symlinks when used to download `node.exe` (versions could be roughly calculated for all of these when matched with release dates if desired). - -The **os** field contains operating system identifiers as well as `src` for source tarballs and `headers` for header tarballs. - -The **arch** field is blank when **os** is `src` or `headers`. - - -## Pre-processed summary data - -A set of pre-processed summary data is also made available in the **[./summaries/](./summaries/)** sub-directory. Each type of summary consists of: - - * A directory containing CSV files with names matching the raw log file names and rows containing aggregated per-day data for the given summary datatype. Most of these files contain two rows, for two days, as the raw log files don't span neatly across day boundaries. - * An aggregation file, in CSV format, where each row is a single day during the full period for which there is available data. - * A PNG file with a simple plot of the data. - -### Total - -Contains two data columns: ***downloads*** and ***TiB***, where TiB is 240 bytes. - -Source data: [./summaries/total/](./summaries/total/) - -Aggregate data: [./summaries/total.csv](./summaries/total.csv) - -Plot: - - - -### Architectures - -Contains a data column per distributed architecture, including ***unknown*** where the architecture cannot be determined (source or header tarballs). The columns are ordered by totals where the architecture that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand when additional architectures are distributed from nodejs.org. The **.pkg** OS X installers are counted as ***x64*** even though, prior to Node.js v4, they were "universal binaries" containing both x64 and x86 versions, usable on both architectures. - -Source data: [./summaries/arch/](./summaries/arch/) - -Aggregate data: [./summaries/arch.csv](./summaries/arch.csv) - -Plot: - - - -### Countries - -Contains a data column per country from the geolocation data, including ***unknown*** where a country could not be determined. The column names take the form of [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166) country codes. The columns are ordered by totals where the country with the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand if additional countries not already listed are discovered via geolocation. - -Source data: [./summaries/country/](./summaries/country/) - -Aggregate data: [./summaries/country.csv](./summaries/country.csv) - -Plot: - - - -### Operating Systems - -Contains a data column per distributed operating system, including ***unknown*** where the operating system cannot be determined (due to `node-latest.tar.gz`), ***src*** for source tarballs and ***headers*** for header tarballs. The columns are ordered by totals where the operating system that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and may expand when additional operating systems are distributed from nodejs.org. - -Source data: [./summaries/os/](./summaries/os/) - -Aggregate data: [./summaries/os.csv](./summaries/os.csv) - -Plot: - - - -### Versions - -Contains a data column per significant version number of Node.js. For <= 0.12, the semver-minor version number is listed, for >= 4.x the semver-major version number is listed. The ***unknown*** column contains counts of downloads where the version number could not be determined (see above note about `node-latest.tar.gz` and the `latest` directory symlinks coupled with `node.exe`). The columns are ordered by totals where the version that has the highest total is listed first and so on, the column ordering may therefore change over time. The list is not fixed and will expand when additional significant Node.js versions are made available for download. - -Source data: [./summaries/version/](./summaries/version/) - -Aggregate data: [./summaries/version.csv](./summaries/version.csv) - -Plot: - - - -## Additional stuff - -The source of this file along with the various scripts used to generate the data files and graphs can be found in the [nodejs/build](https://github.com/nodejs/build) GitHub repository in the [setup/www/tools/metrics](https://github.com/nodejs/build/tree/main/setup/www/tools/metrics) directory. Questions, suggestions and pull requests are welcome in that repository. - - diff --git a/ansible/www-standalone/tools/metrics/strip-travis.pl b/ansible/www-standalone/tools/metrics/strip-travis.pl deleted file mode 100755 index 9af0bf566..000000000 --- a/ansible/www-standalone/tools/metrics/strip-travis.pl +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env perl -use strict; -use warnings; -use List::Util qw(any); - -my $digcmd = "dig +short nat.travisci.net"; -my @travisips = `$digcmd`; -chomp @travisips; - -while (<>) { - my $ipaddr = (split(/,/, $_))[0]; - print if !(any { $_ eq $ipaddr } @travisips); -} diff --git a/ansible/www-standalone/tools/metrics/summary-arch.awk b/ansible/www-standalone/tools/metrics/summary-arch.awk deleted file mode 100644 index c77eb1e6f..000000000 --- a/ansible/www-standalone/tools/metrics/summary-arch.awk +++ /dev/null @@ -1,41 +0,0 @@ -BEGIN { - FS = "," -} - -/^day,/ { - for (i = 2; i <= NF; i++) { - arch_name[i] = $i - } - next -} - -{ - for (i = 2; i <= NF; i++) { - arch[$1][arch_name[i]] += $i - archs[arch_name[i]] += $i - } -} - -END { - len = 0 - PROCINFO["sorted_in"] = "@val_num_desc" - for (a in archs) { - oarchs[++len] = a - } - delete PROCINFO["sorted_in"] - - printf "day" - for (i = 1; i <= len; i++) { - printf ",%s", oarchs[i] - } - printf "\n" - - PROCINFO["sorted_in"] = "@ind_str_asc" - for (day in arch) { - printf "%s", day - for (i = 1; i <= len; i++) { - printf ",%s", arch[day][oarchs[i]] - } - printf "\n" - } -} diff --git a/ansible/www-standalone/tools/metrics/summary-country.awk b/ansible/www-standalone/tools/metrics/summary-country.awk deleted file mode 100644 index 7e9c7946f..000000000 --- a/ansible/www-standalone/tools/metrics/summary-country.awk +++ /dev/null @@ -1,41 +0,0 @@ -BEGIN { - FS = "," -} - -/^day,/ { - for (i = 2; i <= NF; i++) { - country_name[i] = $i - } - next -} - -{ - for (i = 2; i <= NF; i++) { - country[$1][country_name[i]] += $i - countries[country_name[i]] += $i - } -} - -END { - len = 0 - PROCINFO["sorted_in"] = "@val_num_desc" - for (c in countries) { - ocountries[++len] = c - } - delete PROCINFO["sorted_in"] - - printf "day" - for (i = 1; i <= len; i++) { - printf ",%s", ocountries[i] - } - printf "\n" - - PROCINFO["sorted_in"] = "@ind_str_asc" - for (day in country) { - printf "%s", day - for (i = 1; i <= len; i++) { - printf ",%s", country[day][ocountries[i]] - } - printf "\n" - } -} diff --git a/ansible/www-standalone/tools/metrics/summary-os.awk b/ansible/www-standalone/tools/metrics/summary-os.awk deleted file mode 100644 index c7029d803..000000000 --- a/ansible/www-standalone/tools/metrics/summary-os.awk +++ /dev/null @@ -1,41 +0,0 @@ -BEGIN { - FS = "," -} - -/^day,/ { - for (i = 2; i <= NF; i++) { - os_name[i] = $i - } - next -} - -{ - for (i = 2; i <= NF; i++) { - os[$1][os_name[i]] += $i - oss[os_name[i]] += $i - } -} - -END { - len = 0 - PROCINFO["sorted_in"] = "@val_num_desc" - for (a in oss) { - ooss[++len] = a - } - delete PROCINFO["sorted_in"] - - printf "day" - for (i = 1; i <= len; i++) { - printf ",%s", ooss[i] - } - printf "\n" - - PROCINFO["sorted_in"] = "@ind_str_asc" - for (day in os) { - printf "%s", day - for (i = 1; i <= len; i++) { - printf ",%s", os[day][ooss[i]] - } - printf "\n" - } -} diff --git a/ansible/www-standalone/tools/metrics/summary-total.awk b/ansible/www-standalone/tools/metrics/summary-total.awk deleted file mode 100644 index 2bb284d5a..000000000 --- a/ansible/www-standalone/tools/metrics/summary-total.awk +++ /dev/null @@ -1,18 +0,0 @@ -BEGIN { - FS = "," -} - -/^day,/ { next } - -{ - totals[$1] += $2 - bytes[$1] += $3 -} - -END { - PROCINFO["sorted_in"] = "@ind_str_asc" - print "day,downloads,TiB" - for (day in totals) { - printf "%s,%s,%s\n", day, totals[day], bytes[day] / 1024 - } -} diff --git a/ansible/www-standalone/tools/metrics/summary-version.awk b/ansible/www-standalone/tools/metrics/summary-version.awk deleted file mode 100644 index 28d87f837..000000000 --- a/ansible/www-standalone/tools/metrics/summary-version.awk +++ /dev/null @@ -1,51 +0,0 @@ -BEGIN { - FS = "," -} - -/^day,/ { - for (i = 2; i <= NF; i++) { - version_name[i] = $i - } - next -} - -{ - for (i = 2; i <= NF; i++) { - version[$1][version_name[i]] += $i - versions[version_name[i]] += $i - } -} - -# sort versions by semver, i.e. 1 < 0.10 < 0.1 -function version_sort (i1, v1, i2, v2) { - v1 = v1 * (match(v1, /^0.[0-9]$/) ? 10 : 100) - v2 = v2 * (match(v2, /^0.[0-9]$/) ? 10 : 100) - if (v1 < v2) return -1 - if (v1 > v2) return 1 - return 0 -} - -END { - len = 0 - PROCINFO["sorted_in"] = "@val_num_desc" - for (a in versions) { - oversions[++len] = a - } - delete PROCINFO["sorted_in"] - #asort(oversions, oversions, "version_sort") - - printf "day" - for (i = 1; i <= len; i++) { - printf ",%s", oversions[i] - } - printf "\n" - - PROCINFO["sorted_in"] = "@ind_str_asc" - for (day in version) { - printf "%s", day - for (i = 1; i <= len; i++) { - printf ",%s", version[day][oversions[i]] - } - printf "\n" - } -} diff --git a/ansible/www-standalone/tools/metrics/update-geoipdb.sh b/ansible/www-standalone/tools/metrics/update-geoipdb.sh deleted file mode 100755 index b7af17412..000000000 --- a/ansible/www-standalone/tools/metrics/update-geoipdb.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -curl -sL http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz | gunzip -c - > GeoLite2-City.mmdb