diff --git a/0x0-prune.service b/0x0-prune.service new file mode 100644 index 0000000..b28fb2d --- /dev/null +++ b/0x0-prune.service @@ -0,0 +1,22 @@ +[Unit] +Description=Prune 0x0 files +After=remote-fs.target + +[Service] +Type=oneshot +User=nullptr +WorkingDirectory=/path/to/0x0 +BindPaths=/path/to/0x0 + +Environment=FLASK_APP=fhost +ExecStart=/usr/bin/flask prune +ProtectProc=noaccess +ProtectSystem=strict +ProtectHome=tmpfs +PrivateTmp=true +PrivateUsers=true +ProtectKernelLogs=true +LockPersonality=true + +[Install] +WantedBy=multi-user.target diff --git a/0x0-prune.timer b/0x0-prune.timer new file mode 100644 index 0000000..df6a594 --- /dev/null +++ b/0x0-prune.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Prune 0x0 files + +[Timer] +OnCalendar=hourly +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/0x0-vscan.service b/0x0-vscan.service new file mode 100644 index 0000000..6a48b1c --- /dev/null +++ b/0x0-vscan.service @@ -0,0 +1,22 @@ +[Unit] +Description=Scan 0x0 files with ClamAV +After=remote-fs.target clamd.service + +[Service] +Type=oneshot +User=nullptr +WorkingDirectory=/path/to/0x0 +BindPaths=/path/to/0x0 + +Environment=FLASK_APP=fhost +ExecStart=/usr/bin/flask vscan +ProtectProc=noaccess +ProtectSystem=strict +ProtectHome=tmpfs +PrivateTmp=true +PrivateUsers=true +ProtectKernelLogs=true +LockPersonality=true + +[Install] +WantedBy=multi-user.target diff --git a/0x0-vscan.timer b/0x0-vscan.timer new file mode 100644 index 0000000..d2c6486 --- /dev/null +++ b/0x0-vscan.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Scan 0x0 files with ClamAV + +[Timer] +OnCalendar=hourly +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/README.rst b/README.rst index 0e1c1cc..8c512de 100644 --- a/README.rst +++ b/README.rst @@ -7,13 +7,13 @@ This is a no-bullshit file hosting and URL shortening service that also runs Configuration ------------- -To configure 0x0, create ``instance/config.py``. -The defaults are at the start of ``fhost.py``. To change them, -add them to ``instance/config.py``— for example:: +To configure 0x0, copy ``instance/config.example.py`` to ``instance/config.py``, then edit +it. Resonable defaults are set, but there's a couple options you'll need to change +before running 0x0 for the first time. - SQLALCHEMY_DATABASE_URI = "sqlite:///some/path/db.sqlite" - -For more information on instance configuration, see +By default, the configuration is stored in the Flask instance directory. +Normally, this is in `./instance`, but it might be different for your system. +For details, see `the Flask documentation `_. To customize the home and error pages, simply create a ``templates`` directory @@ -35,10 +35,17 @@ downsides, one of them being that range requests will not work. This is a problem for example when streaming media files: It won’t be possible to seek, and some ISOBMFF (MP4) files will not play at all. -To make files expire, simply create a cronjob that runs ``cleanup.py`` every -now and then. +To make files expire, simply run ``FLASK_APP=fhost flask prune`` every +now and then. You can use the provided systemd unit files for this:: -Before running the service for the first time, run ``FLASK_APP=fhost flask db upgrade``. + 0x0-prune.service + 0x0-prune.timer + +Make sure to edit them to match your system configuration. In particular, +set the user and paths in ``0x0-prune.service``. + +Before running the service for the first time and every time you update it +from this git repository, run ``FLASK_APP=fhost flask db upgrade``. NSFW Detection @@ -49,7 +56,24 @@ neural network model. This works for images and video files and requires the following: * Caffe Python module (built for Python 3) -* ``ffmpegthumbnailer`` executable in ``$PATH`` +* `PyAV `_ + + +Virus Scanning +-------------- + +0x0 can scan its files with ClamAV’s daemon. As this can take a long time +for larger files, this does not happen immediately but instead every time +you run the ``vscan`` command. It is recommended to configure a systemd +timer or cronjob to do this periodically. Examples are included:: + + 0x0-vscan.service + 0x0-vscan.timer + +Remember to adjust your size limits in clamd.conf, including +``StreamMaxLength``! + +This feature requires the `clamd module `_. Network Security Considerations diff --git a/cleanup.py b/cleanup.py index 0f9a5ce..14fbc61 100755 --- a/cleanup.py +++ b/cleanup.py @@ -1,44 +1,8 @@ #!/usr/bin/env python3 -""" - Copyright © 2020 Mia Herkt - Licensed under the EUPL, Version 1.2 or - as soon as approved - by the European Commission - subsequent versions of the EUPL - (the "License"); - You may not use this work except in compliance with the License. - You may obtain a copy of the license at: - - https://joinup.ec.europa.eu/software/page/eupl - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - either express or implied. - See the License for the specific language governing permissions - and limitations under the License. -""" - -import os -import sys -import time -import datetime -from fhost import app - -os.chdir(os.path.dirname(sys.argv[0])) -os.chdir(app.config["FHOST_STORAGE_PATH"]) - -files = [f for f in os.listdir(".")] - -maxs = app.config["MAX_CONTENT_LENGTH"] -mind = 30 -maxd = 365 - -for f in files: - stat = os.stat(f) - systime = time.time() - age = datetime.timedelta(seconds=(systime - stat.st_mtime)).days - - maxage = mind + (-maxd + mind) * (stat.st_size / maxs - 1) ** 3 - - if age >= maxage: - os.remove(f) +print("This script has been replaced!!") +print("Instead, please run") +print("") +print(" $ FLASK_APP=fhost flask prune") +print("") +exit(1); diff --git a/fhost.py b/fhost.py index 2f53db0..dbf6057 100755 --- a/fhost.py +++ b/fhost.py @@ -22,13 +22,20 @@ from flask import Flask, abort, make_response, redirect, request, send_from_directory, url_for, Response, render_template from flask_sqlalchemy import SQLAlchemy from flask_migrate import Migrate +from sqlalchemy import and_, or_ from jinja2.exceptions import * from jinja2 import ChoiceLoader, FileSystemLoader from hashlib import sha256 from magic import Magic from mimetypes import guess_extension +import click +import os import sys +import time +import datetime +import typing import requests +import secrets from validators import url as url_valid from pathlib import Path @@ -42,6 +49,7 @@ app.config.update( FHOST_USE_X_ACCEL_REDIRECT = True, # expect nginx by default FHOST_STORAGE_PATH = "up", FHOST_MAX_EXT_LENGTH = 9, + FHOST_SECRET_BYTES = 16, FHOST_EXT_OVERRIDE = { "audio/flac" : ".flac", "image/gif" : ".gif", @@ -63,6 +71,13 @@ app.config.update( FHOST_UPLOAD_BLACKLIST = None, NSFW_DETECT = False, NSFW_THRESHOLD = 0.608, + VSCAN_SOCKET = None, + VSCAN_QUARANTINE_PATH = "quarantine", + VSCAN_IGNORE = [ + "Eicar-Test-Signature", + "PUA.Win.Packer.XmMusicFile", + ], + VSCAN_INTERVAL = datetime.timedelta(days=7), URL_ALPHABET = "DEQhd2uFteibPwq0SWBInTpA_jcZL5GKz3YCR14Ulk87Jors9vNHgfaOmMXy6Vx-", ) @@ -121,12 +136,23 @@ class File(db.Model): addr = db.Column(db.UnicodeText) removed = db.Column(db.Boolean, default=False) nsfw_score = db.Column(db.Float) + expiration = db.Column(db.BigInteger) + mgmt_token = db.Column(db.String) + secret = db.Column(db.String) + last_vscan = db.Column(db.DateTime) + size = db.Column(db.BigInteger) - def __init__(self, sha256, ext, mime, addr): + def __init__(self, sha256, ext, mime, addr, expiration, mgmt_token): self.sha256 = sha256 self.ext = ext self.mime = mime self.addr = addr + self.expiration = expiration + self.mgmt_token = mgmt_token + + @property + def is_nsfw(self) -> bool: + return self.nsfw_score and self.nsfw_score > app.config["NSFW_THRESHOLD"] def getname(self): return u"{0}{1}".format(su.enbase(self.id), self.ext) @@ -134,12 +160,58 @@ class File(db.Model): def geturl(self): n = self.getname() - if self.nsfw_score and self.nsfw_score > app.config["NSFW_THRESHOLD"]: - return url_for("get", path=n, _external=True, _anchor="nsfw") + "\n" + if self.is_nsfw: + return url_for("get", path=n, secret=self.secret, _external=True, _anchor="nsfw") + "\n" else: - return url_for("get", path=n, _external=True) + "\n" + return url_for("get", path=n, secret=self.secret, _external=True) + "\n" - def store(file_, addr): + def getpath(self) -> Path: + return Path(app.config["FHOST_STORAGE_PATH"]) / self.sha256 + + def delete(self, permanent=False): + self.expiration = None + self.mgmt_token = None + self.removed = permanent + self.getpath().unlink(missing_ok=True) + + # Returns the epoch millisecond that a file should expire + # + # Uses the expiration time provided by the user (requested_expiration) + # upper-bounded by an algorithm that computes the size based on the size of the + # file. + # + # That is, all files are assigned a computed expiration, which can voluntarily + # shortened by the user either by providing a timestamp in epoch millis or a + # duration in hours. + def get_expiration(requested_expiration, size) -> int: + current_epoch_millis = time.time() * 1000; + + # Maximum lifetime of the file in milliseconds + this_files_max_lifespan = get_max_lifespan(size); + + # The latest allowed expiration date for this file, in epoch millis + this_files_max_expiration = this_files_max_lifespan + 1000 * time.time(); + + if requested_expiration is None: + return this_files_max_expiration + elif requested_expiration < 1650460320000: + # Treat the requested expiration time as a duration in hours + requested_expiration_ms = requested_expiration * 60 * 60 * 1000 + return min(this_files_max_expiration, current_epoch_millis + requested_expiration_ms) + else: + # Treat the requested expiration time as a timestamp in epoch millis + return min(this_files_max_expiration, requested_expiration) + + """ + requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + + Any value greater that the longest allowed file lifespan will be rounded down to that + value. + """ + def store(file_, requested_expiration: typing.Optional[int], addr, secret: bool): data = file_.read() digest = sha256(data).hexdigest() @@ -155,6 +227,9 @@ class File(db.Model): if mime in app.config["FHOST_MIME_BLACKLIST"] or guess in app.config["FHOST_MIME_BLACKLIST"]: abort(415) + if len(mime) > 128: + abort(400) + if mime.startswith("text/") and not "charset" in mime: mime += "; charset=utf-8" @@ -162,6 +237,8 @@ class File(db.Model): def get_ext(mime): ext = "".join(Path(file_.filename).suffixes[-2:]) + if len(ext) > app.config["FHOST_MAX_EXT_LENGTH"]: + ext = Path(file_.filename).suffixes[-1] gmime = mime.split(";")[0] guess = guess_extension(gmime) @@ -175,18 +252,38 @@ class File(db.Model): return ext[:app.config["FHOST_MAX_EXT_LENGTH"]] or ".bin" - f = File.query.filter_by(sha256=digest).first() + expiration = File.get_expiration(requested_expiration, len(data)) + isnew = True + f = File.query.filter_by(sha256=digest).first() if f: + # If the file already exists if f.removed: + # The file was removed by moderation, so don't accept it back abort(451) + if f.expiration is None: + # The file has expired, so give it a new expiration date + f.expiration = expiration + + # Also generate a new management token + f.mgmt_token = secrets.token_urlsafe() + else: + # The file already exists, update the expiration if needed + f.expiration = max(f.expiration, expiration) + isnew = False else: mime = get_mime() ext = get_ext(mime) - f = File(digest, ext, mime, addr) + mgmt_token = secrets.token_urlsafe() + f = File(digest, ext, mime, addr, expiration, mgmt_token) f.addr = addr + if isnew: + f.secret = None + if secret: + f.secret = secrets.token_urlsafe(app.config["FHOST_SECRET_BYTES"]) + storage = Path(app.config["FHOST_STORAGE_PATH"]) storage.mkdir(parents=True, exist_ok=True) p = storage / digest @@ -194,16 +291,15 @@ class File(db.Model): if not p.is_file(): with open(p, "wb") as of: of.write(data) - else: - p.touch() + + f.size = len(data) if not f.nsfw_score and app.config["NSFW_DETECT"]: f.nsfw_score = nsfw.detect(p) db.session.add(f) db.session.commit() - return f - + return f, isnew class UrlEncoder(object): @@ -260,15 +356,30 @@ def in_upload_bl(addr): return False -def store_file(f, addr): +""" +requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + +Any value greater that the longest allowed file lifespan will be rounded down to that +value. +""" +def store_file(f, requested_expiration: typing.Optional[int], addr, secret: bool): if in_upload_bl(addr): return "Your host is blocked from uploading files.\n", 451 - sf = File.store(f, addr) + sf, isnew = File.store(f, requested_expiration, addr, secret) - return sf.geturl() + response = make_response(sf.geturl()) + response.headers["X-Expires"] = sf.expiration -def store_url(url, addr): + if isnew: + response.headers["X-Token"] = sf.mgmt_token + + return response + +def store_url(url, addr, secret: bool): if is_fhost_url(url): abort(400) @@ -289,40 +400,81 @@ def store_url(url, addr): f = urlfile(read=r.raw.read, content_type=r.headers["content-type"], filename="") - return store_file(f, addr) + return store_file(f, None, addr, secret) else: abort(413) else: abort(411) -@app.route("/") -def get(path): - path = Path(path.split("/", 1)[0]) - sufs = "".join(path.suffixes[-2:]) - name = path.name[:-len(sufs) or None] +def manage_file(f): + try: + assert(request.form["token"] == f.mgmt_token) + except: + abort(401) + + if "delete" in request.form: + f.delete() + db.session.commit() + return "" + if "expires" in request.form: + try: + requested_expiration = int(request.form["expires"]) + except ValueError: + abort(400) + + f.expiration = File.get_expiration(requested_expiration, f.size) + db.session.commit() + return "", 202 + + abort(400) + +@app.route("/", methods=["GET", "POST"]) +@app.route("/s//", methods=["GET", "POST"]) +def get(path, secret=None): + p = Path(path.split("/", 1)[0]) + sufs = "".join(p.suffixes[-2:]) + name = p.name[:-len(sufs) or None] + + if "." in name: + abort(404) + id = su.debase(name) if sufs: f = File.query.get(id) if f and f.ext == sufs: + if f.secret != secret: + abort(404) + if f.removed: abort(451) - fpath = Path(app.config["FHOST_STORAGE_PATH"]) / f.sha256 + fpath = f.getpath() if not fpath.is_file(): abort(404) + if request.method == "POST": + return manage_file(f) + if app.config["FHOST_USE_X_ACCEL_REDIRECT"]: response = make_response() response.headers["Content-Type"] = f.mime - response.headers["Content-Length"] = fpath.stat().st_size + response.headers["Content-Length"] = f.size response.headers["X-Accel-Redirect"] = "/" + str(fpath) - return response else: - return send_from_directory(app.config["FHOST_STORAGE_PATH"], f.sha256, mimetype = f.mime) + response = send_from_directory(app.config["FHOST_STORAGE_PATH"], f.sha256, mimetype = f.mime) + + response.headers["X-Expires"] = f.expiration + return response else: + if request.method == "POST": + abort(405) + + if "/" in path: + abort(404) + u = URL.query.get(id) if u: @@ -334,11 +486,34 @@ def get(path): def fhost(): if request.method == "POST" or request.method == "PUT": sf = None + secret = "secret" in request.form if "file" in request.files: - return store_file(request.files["file"], request.remote_addr) + try: + # Store the file with the requested expiration date + return store_file( + request.files["file"], + int(request.form["expires"]), + request.remote_addr, + secret + ) + except ValueError: + # The requested expiration date wasn't properly formed + abort(400) + except KeyError: + # No expiration date was requested, store with the max lifespan + return store_file( + request.files["file"], + None, + request.remote_addr, + secret + ) elif "url" in request.form: - return store_url(request.form["url"], request.remote_addr) + return store_url( + request.form["url"], + request.remote_addr, + secret + ) elif "shorten" in request.form: return shorten(request.form["shorten"]) @@ -353,6 +528,7 @@ Disallow: / """ @app.errorhandler(400) +@app.errorhandler(401) @app.errorhandler(404) @app.errorhandler(411) @app.errorhandler(413) @@ -361,6 +537,129 @@ Disallow: / @app.errorhandler(451) def ehandler(e): try: - return render_template(f"{e.code}.html", id=id), e.code + return render_template(f"{e.code}.html", id=id, request=request), e.code except TemplateNotFound: return "Segmentation fault\n", e.code + +@app.cli.command("prune") +def prune(): + """ + Clean up expired files + + Deletes any files from the filesystem which have hit their expiration time. This + doesn't remove them from the database, only from the filesystem. It's recommended + that server owners run this command regularly, or set it up on a timer. + """ + current_time = time.time() * 1000; + + # The path to where uploaded files are stored + storage = Path(app.config["FHOST_STORAGE_PATH"]) + + # A list of all files who've passed their expiration times + expired_files = File.query\ + .where( + and_( + File.expiration.is_not(None), + File.expiration < current_time + ) + ) + + files_removed = 0; + + # For every expired file... + for file in expired_files: + # Log the file we're about to remove + file_name = file.getname() + file_hash = file.sha256 + file_path = storage / file_hash + print(f"Removing expired file {file_name} [{file_hash}]") + + # Remove it from the file system + try: + os.remove(file_path) + files_removed += 1; + except FileNotFoundError: + pass # If the file was already gone, we're good + except OSError as e: + print(e) + print( + "\n------------------------------------" + "Encountered an error while trying to remove file {file_path}. Double" + "check to make sure the server is configured correctly, permissions are" + "okay, and everything is ship shape, then try again.") + return; + + # Finally, mark that the file was removed + file.expiration = None; + db.session.commit() + + print(f"\nDone! {files_removed} file(s) removed") + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) + +def do_vscan(f): + if f["path"].is_file(): + with open(f["path"], "rb") as scanf: + try: + f["result"] = list(app.config["VSCAN_SOCKET"].instream(scanf).values())[0] + except: + f["result"] = ("SCAN FAILED", None) + else: + f["result"] = ("FILE NOT FOUND", None) + + return f + +@app.cli.command("vscan") +def vscan(): + if not app.config["VSCAN_SOCKET"]: + print("""Error: Virus scanning enabled but no connection method specified. +Please set VSCAN_SOCKET.""") + sys.exit(1) + + qp = Path(app.config["VSCAN_QUARANTINE_PATH"]) + qp.mkdir(parents=True, exist_ok=True) + + from multiprocessing import Pool + with Pool() as p: + if isinstance(app.config["VSCAN_INTERVAL"], datetime.timedelta): + scandate = datetime.datetime.now() - app.config["VSCAN_INTERVAL"] + res = File.query.filter(or_(File.last_vscan < scandate, + File.last_vscan == None), + File.removed == False) + else: + res = File.query.filter(File.last_vscan == None, File.removed == False) + + work = [{"path" : f.getpath(), "name" : f.getname(), "id" : f.id} for f in res] + + results = [] + for i, r in enumerate(p.imap_unordered(do_vscan, work)): + if r["result"][0] != "OK": + print(f"{r['name']}: {r['result'][0]} {r['result'][1] or ''}") + + found = False + if r["result"][0] == "FOUND": + if not r["result"][1] in app.config["VSCAN_IGNORE"]: + r["path"].rename(qp / r["name"]) + found = True + + results.append({ + "id" : r["id"], + "last_vscan" : None if r["result"][0] == "SCAN FAILED" else datetime.datetime.now(), + "removed" : found}) + + db.session.bulk_update_mappings(File, results) + db.session.commit() diff --git a/instance/config.example.py b/instance/config.example.py new file mode 100644 index 0000000..825afcb --- /dev/null +++ b/instance/config.example.py @@ -0,0 +1,215 @@ + + + ################################################################################ + # This is a configuration file for 0x0 / The Null Pointer # + # # + # The default values here are set to generally reasonable defaults, but a # + # couple of things need your attention. Specifically, make sure you set # + # SQLALCHEMY_DATABASE_URI. You'll also probably want to configure # + # FHOST_USE_X_SENDFILE and FHOST_USE_X_ACCEL_REDIRECT to match your webserver. # + # # + # Need help, or find anything confusing? Try opening up an issue! # + # https://git.0x0.st/mia/0x0/issues/new # + ################################################################################ + + + +# The database URL for the database 0x0 should use +# +# See https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls +# for help configuring these for your database. +# +# For small and medium servers, it's plenty sufficient to just use an sqlite +# database. In this case, the database URI you want to use is just +# +# sqlite:/// + /path/to/your/database.db +# +# Until https://git.0x0.st/mia/0x0/issues/70 is resolved, it's recommended that +# any sqlite databases use an absolute path, as relative paths aren't consistently +# resolved. +SQLALCHEMY_DATABASE_URI = 'sqlite:///' + '/path/to/database.sqlite' + + +# The maximum allowable upload size, in bytes +# +# Keep in mind that this affects the expiration of files as well! The closer a +# file is to the max content length, the less time it will last before being +# deleted. +MAX_CONTENT_LENGTH = 256 * 1024 * 1024 # Default: 256MiB + + +# The maximum length of URLs we'll shorten, in characters +# +# If a user tries to submit a URL longer than this, we'll reject their request +# with a 414 REQUEST URI TOO LONG. +MAX_URL_LENGTH = 4096 + + +# The minimum and maximum amount of time we'll retain a file for +# +# Small files (nearing zero bytes) are stored for the longest possible expiration date, +# while larger files (nearing MAX_CONTENT_LENGTH bytes) are stored for the shortest amount +# of time. Values between these two extremes are interpolated with an exponential curve, +# like the one shown on the index page. +# +# All times are in milliseconds. If you want all files to be stored for the same amount +# of time, set these to the same value. +FHOST_MIN_EXPIRATION = 30 * 24 * 60 * 60 * 1000 +FHOST_MAX_EXPIRATION = 365 * 24 * 60 * 60 * 1000 + + +# Use the X-SENDFILE header to speed up serving files w/ compatible webservers +# +# Some webservers can be configured use the X-Sendfile header to handle sending +# large files on behalf of the application. If your server is setup to do +# this, set this variable to True +USE_X_SENDFILE = False + + +# Use X-Accel-Redirect to speed up serving files w/ compatible webservers +# +# Other webservers, like nginx and Caddy, use the X-Accel-Redirect header to +# accomplish a very similar thing to X-Sendfile (above). If your webserver is +# configured to do this, set this variable to True +# +# Note: It's recommended that you use either X-Sendfile or X-Accel-Redirect +# when you deploy in production. +FHOST_USE_X_ACCEL_REDIRECT = True # expect nginx by default + + +# The directory that 0x0 should store uploaded files in +# +# Whenever a file is uploaded to 0x0, we store it here! Relative paths are +# resolved relative to the working directory that 0x0 is being run from. +FHOST_STORAGE_PATH = "up" + + +# The maximum acceptable user-specified file extension +# +# When a user uploads a file, in most cases, we keep the file extension they +# provide. But! If the specified file extension is longer than +# FHOST_MAX_EXT_LENGTH, we truncate it. So if a user tries to upload the file +# "myfile.withareallongext", but FHOST_MAX_EXT_LENGTH is set to 9, then the +# extension that we keep is ".withareal" +FHOST_MAX_EXT_LENGTH = 9 + + +# The number of bytes used for "secret" URLs +# +# When a user uploads a file with the "secret" option, 0x0 generates a string +# from this many bytes of random data. It is base64-encoded, so on average +# each byte results in approximately 1.3 characters. +FHOST_SECRET_BYTES = 16 + +# A list of filetypes to use when the uploader doesn't specify one +# +# When a user uploads a file with no file extension, we try to find an extension that +# works for that file. This configuration option is the first thing that we check. If +# the type of a file without an extension is in this dict, then it'll be used as the file +# extension for that file. Otherwise, we try to pick something sensible from libmagic's +# database. +# +# For example, if the user uploads "myfile" with no extension, and the file is a jpeg +# image, the file will get a URL like "eAa.jpg" +# +# For a list of MIME types you can use in this list, check +# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types +FHOST_EXT_OVERRIDE = { + "audio/flac" : ".flac", + "image/gif" : ".gif", + "image/jpeg" : ".jpg", + "image/png" : ".png", + "image/svg+xml" : ".svg", + "video/webm" : ".webm", + "video/x-matroska" : ".mkv", + "application/octet-stream" : ".bin", + "text/plain" : ".log", + "text/plain" : ".txt", + "text/x-diff" : ".diff", +} + + +# Control which files aren't allowed to be uploaded +# +# Certain kinds of files are never accepted. If the file claims to be one of +# these types of files, or if we look at the contents of the file and it looks +# like one of these filetypes, then we reject the file outright with a 415 +# UNSUPPORTED MEDIA EXCEPTION +FHOST_MIME_BLACKLIST = [ + "application/x-dosexec", + "application/java-archive", + "application/java-vm" +] + + +# A list of IP addresses which are blacklisted from uploading files +# +# Can be set to the path of a file with an IP address on each line. The file +# can also include comment lines using a pound sign (#). Paths are resolved +# relative to the instance/ directory. +# +# If this is set to None, then no IP blacklist will be consulted. +FHOST_UPLOAD_BLACKLIST = None + + +# Enables support for detecting NSFW images +# +# Consult README.md for additional dependencies before setting to True +NSFW_DETECT = False + + +# The cutoff for when an image is considered NFSW +# +# When the NSFW detection algorithm generates an output higher than this +# number, an image is considered to be NSFW. NSFW images aren't declined, but +# are marked as NSFW. +# +# If NSFW_DETECT is set to False, then this has no effect. +NSFW_THRESHOLD = 0.608 + + +# If you want to scan files for viruses using ClamAV, specify the socket used +# for connections here. You will need the clamd module. +# Since this can take a very long time on larger files, it is not done +# immediately but every time you run the vscan command. It is recommended to +# configure a systemd timer or cronjob to do this periodically. +# Remember to adjust your size limits in clamd.conf, including StreamMaxLength! +# +# Example: +# from clamd import ClamdUnixSocket +# VSCAN_SOCKET = ClamdUnixSocket("/run/clamav/clamd-socket") + +# This is the directory that files flagged as malicious are moved to. +# Relative paths are resolved relative to the working directory +# of the 0x0 process. +VSCAN_QUARANTINE_PATH = "quarantine" + +# Since updated virus definitions might catch some files that were previously +# reported as clean, you may want to rescan old files periodically. +# Set this to a datetime.timedelta to specify the frequency, or None to +# disable rescanning. +from datetime import timedelta +VSCAN_INTERVAL = timedelta(days=7) + +# Some files flagged by ClamAV are usually not malicious, especially if the +# DetectPUA option is enabled in clamd.conf. This is a list of signatures +# that will be ignored. +VSCAN_IGNORE = [ + "Eicar-Test-Signature", + "PUA.Win.Packer.XmMusicFile", +] + +# A list of all characters which can appear in a URL +# +# If this list is too short, then URLs can very quickly become long. +# Generally, the default value for this should work for basically all usecases. +URL_ALPHABET = "DEQhd2uFteibPwq0SWBInTpA_jcZL5GKz3YCR14Ulk87Jors9vNHgfaOmMXy6Vx-" + + + ################################################################################# + # CONGRATULATIONS! You made it all the way through! # + # If you want to go even further to customize your instance, try checking out # + # the templates in the templates/ directory to customize your landing page, 404 # + # page, and other error pages. # + ################################################################################# + diff --git a/migrations/versions/0659d7b9eea8_.py b/migrations/versions/0659d7b9eea8_.py new file mode 100644 index 0000000..2ef2151 --- /dev/null +++ b/migrations/versions/0659d7b9eea8_.py @@ -0,0 +1,26 @@ +"""add file management token + +Revision ID: 0659d7b9eea8 +Revises: 939a08e1d6e5 +Create Date: 2022-11-30 01:06:53.362973 + +""" + +# revision identifiers, used by Alembic. +revision = '0659d7b9eea8' +down_revision = '939a08e1d6e5' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file', sa.Column('mgmt_token', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('file', 'mgmt_token') + # ### end Alembic commands ### diff --git a/migrations/versions/30bfe33aa328_add_file_size_field.py b/migrations/versions/30bfe33aa328_add_file_size_field.py new file mode 100644 index 0000000..e6ac279 --- /dev/null +++ b/migrations/versions/30bfe33aa328_add_file_size_field.py @@ -0,0 +1,46 @@ +"""add file size field + +Revision ID: 30bfe33aa328 +Revises: 5cee97aab219 +Create Date: 2022-12-13 22:32:12.242394 + +""" + +# revision identifiers, used by Alembic. +revision = '30bfe33aa328' +down_revision = '5cee97aab219' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.orm import Session +from flask import current_app +from pathlib import Path + +Base = automap_base() + +def upgrade(): + op.add_column('file', sa.Column('size', sa.BigInteger(), nullable=True)) + bind = op.get_bind() + Base.prepare(autoload_with=bind) + File = Base.classes.file + session = Session(bind=bind) + + storage = Path(current_app.config["FHOST_STORAGE_PATH"]) + + updates = [] + files = session.scalars(sa.select(File).where(sa.not_(File.removed))) + for f in files: + p = storage / f.sha256 + if p.is_file(): + updates.append({ + "id" : f.id, + "size" : p.stat().st_size + }) + + session.bulk_update_mappings(File, updates) + session.commit() + + +def downgrade(): + op.drop_column('file', 'size') diff --git a/migrations/versions/5cee97aab219_.py b/migrations/versions/5cee97aab219_.py new file mode 100644 index 0000000..6c1a16b --- /dev/null +++ b/migrations/versions/5cee97aab219_.py @@ -0,0 +1,26 @@ +"""add date of last virus scan + +Revision ID: 5cee97aab219 +Revises: e2e816056589 +Create Date: 2022-12-10 16:39:56.388259 + +""" + +# revision identifiers, used by Alembic. +revision = '5cee97aab219' +down_revision = 'e2e816056589' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file', sa.Column('last_vscan', sa.DateTime(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('file', 'last_vscan') + # ### end Alembic commands ### diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py new file mode 100644 index 0000000..f86dcb3 --- /dev/null +++ b/migrations/versions/939a08e1d6e5_.py @@ -0,0 +1,81 @@ +"""add file expirations + +Revision ID: 939a08e1d6e5 +Revises: 7e246705da6a +Create Date: 2022-11-22 12:16:32.517184 + +""" + +# revision identifiers, used by Alembic. +revision = '939a08e1d6e5' +down_revision = '7e246705da6a' + +from alembic import op +from flask import current_app +from flask_sqlalchemy import SQLAlchemy +from pathlib import Path +import sqlalchemy as sa +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.orm import Session + +import os +import time + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = current_app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = current_app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) + +Base = automap_base() + +def upgrade(): + op.add_column('file', sa.Column('expiration', sa.BigInteger())) + + bind = op.get_bind() + Base.prepare(autoload_with=bind) + File = Base.classes.file + session = Session(bind=bind) + + storage = Path(current_app.config["FHOST_STORAGE_PATH"]) + current_time = time.time() * 1000; + + # List of file hashes which have not expired yet + # This could get really big for some servers + try: + unexpired_files = os.listdir(storage) + except FileNotFoundError: + return # There are no currently unexpired files + + # Calculate an expiration date for all existing files + files = session.scalars( + sa.select(File) + .where( + sa.not_(File.removed), + File.sha256.in_(unexpired_files) + ) + ) + updates = [] # We coalesce updates to the database here + for file in files: + file_path = storage / file.sha256 + stat = os.stat(file_path) + max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms + file_birth = stat.st_mtime * 1000 # When the file was created, in ms + updates.append({'id': file.id, 'expiration': int(file_birth + max_age)}) + + # Apply coalesced updates + session.bulk_update_mappings(File, updates) + session.commit() + +def downgrade(): + op.drop_column('file', 'expiration') diff --git a/migrations/versions/e2e816056589_.py b/migrations/versions/e2e816056589_.py new file mode 100644 index 0000000..7c31ba9 --- /dev/null +++ b/migrations/versions/e2e816056589_.py @@ -0,0 +1,26 @@ +"""add URL secret + +Revision ID: e2e816056589 +Revises: 0659d7b9eea8 +Create Date: 2022-12-01 02:16:15.976864 + +""" + +# revision identifiers, used by Alembic. +revision = 'e2e816056589' +down_revision = '0659d7b9eea8' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file', sa.Column('secret', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('file', 'secret') + # ### end Alembic commands ### diff --git a/nsfw_detect.py b/nsfw_detect.py index 6bd9219..032f7e4 100755 --- a/nsfw_detect.py +++ b/nsfw_detect.py @@ -22,21 +22,21 @@ import numpy as np import os import sys from io import BytesIO -from subprocess import run, PIPE, DEVNULL - -import caffe +from pathlib import Path os.environ["GLOG_minloglevel"] = "2" # seriously :| - +import caffe +import av +av.logging.set_level(av.logging.PANIC) class NSFWDetector: def __init__(self): - - npath = os.path.join(os.path.dirname(__file__), "nsfw_model") + npath = Path(__file__).parent / "nsfw_model" self.nsfw_net = caffe.Net( - os.path.join(npath, "deploy.prototxt"), - os.path.join(npath, "resnet_50_1by2_nsfw.caffemodel"), - caffe.TEST) + str(npath / "deploy.prototxt"), + caffe.TEST, + weights = str(npath / "resnet_50_1by2_nsfw.caffemodel") + ) self.caffe_transformer = caffe.io.Transformer({ 'data': self.nsfw_net.blobs['data'].data.shape }) @@ -50,7 +50,7 @@ class NSFWDetector: self.caffe_transformer.set_channel_swap('data', (2, 1, 0)) def _compute(self, img): - image = caffe.io.load_image(BytesIO(img)) + image = caffe.io.load_image(img) H, W, _ = image.shape _, _, h, w = self.nsfw_net.blobs["data"].data.shape @@ -72,16 +72,26 @@ class NSFWDetector: def detect(self, fpath): try: - ff = run([ - "ffmpegthumbnailer", "-m", "-o-", "-s256", "-t50%", "-a", - "-cpng", "-i", fpath - ], stdout=PIPE, stderr=DEVNULL, check=True) - image_data = ff.stdout + with av.open(fpath) as container: + try: container.seek(int(container.duration / 2)) + except: container.seek(0) + + frame = next(container.decode(video=0)) + + if frame.width >= frame.height: + w = 256 + h = int(frame.height * (256 / frame.width)) + else: + w = int(frame.width * (256 / frame.height)) + h = 256 + frame = frame.reformat(width=w, height=h, format="rgb24") + img = BytesIO() + frame.to_image().save(img, format="ppm") + + scores = self._compute(img) except: return -1.0 - scores = self._compute(image_data) - return scores[1] diff --git a/templates/401.html b/templates/401.html new file mode 100644 index 0000000..672c7e4 --- /dev/null +++ b/templates/401.html @@ -0,0 +1,2 @@ +rm: cannot remove '{{ request.path.split("/")[1] }}': Permission denied + diff --git a/templates/index.html b/templates/index.html index d646b9c..13e92e7 100644 --- a/templates/index.html +++ b/templates/index.html @@ -6,14 +6,38 @@ HTTP POST files here: curl -F'file=@yourfile.png' {{ fhost_url }} You can also POST remote URLs: curl -F'url=http://example.com/image.jpg' {{ fhost_url }} +If you don't want the resulting URL to be easy to guess: + curl -F'file=@yourfile.png' -Fsecret= {{ fhost_url }} + curl -F'url=http://example.com/image.jpg' -Fsecret= {{ fhost_url }} Or you can shorten URLs: curl -F'shorten=http://example.com/some/long/url' {{ fhost_url }} Alternatively, you can use PUT: curl -X PUT -T 'yourfile.png' {{ fhost_url }} +It is possible to append your own file name to the URL: + {{ fhost_url }}/aaa.jpg/image.jpeg + File URLs are valid for at least 30 days and up to a year (see below). Shortened URLs do not expire. + +Files can be set to expire sooner by adding an "expires" parameter (in hours) + curl -F'file=@yourfile.png' -Fexpires=24 {{ fhost_url }} +OR by setting "expires" to a timestamp in epoch milliseconds + curl -F'file=@yourfile.png' -Fexpires=1681996320000 {{ fhost_url }} + +Expired files won't be removed immediately, but will be removed as part of +the next purge. + +Whenever a file that does not already exist or has expired is uploaded, +the HTTP response header includes an X-Token field. You can use this +to perform management operations on the file. + +To delete the file immediately: + curl -Ftoken=token_here -Fdelete= {{ fhost_url }}/abc.txt +To change the expiration date (see above): + curl -Ftoken=token_here -Fexpires=3 {{ fhost_url }}/abc.txt + {% set max_size = config["MAX_CONTENT_LENGTH"]|filesizeformat(True) %} Maximum file size: {{ max_size }} Not allowed: {{ config["FHOST_MIME_BLACKLIST"]|join(", ") }} @@ -25,24 +49,24 @@ FILE RETENTION PERIOD retention = min_age + (-max_age + min_age) * pow((file_size / max_size - 1), 3) days - 365 | \\ - | \\ - | \\ - | \\ - | \\ - | \\ + {{'{: 6}'.format(config.get("FHOST_MAX_EXPIRATION", 31536000000)//86400000)}} | \ + | \ + | \ + | \ + | \ + | \ | .. - | \\ - 197.5 | ----------..------------------------------------------- + | \ + {{'{: 6.1f}'.format((config.get("FHOST_MIN_EXPIRATION", 2592000000)/2 + config.get("FHOST_MAX_EXPIRATION", 31536000000)/2)/86400000)}} | ----------..------------------------------------------- | .. - | \\ + | \ | .. | ... | .. | ... | .... | ...... - 30 | .................... + {{'{: 6}'.format(config.get("FHOST_MIN_EXPIRATION", 2592000000)//86400000)}} | .................... 0{{ ((config["MAX_CONTENT_LENGTH"]/2)|filesizeformat(True)).split(" ")[0].rjust(27) }}{{ max_size.split(" ")[0].rjust(27) }} {{ max_size.split(" ")[1].rjust(54) }} diff --git a/tests/test_client.py b/tests/test_client.py index 80de781..48a391a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -56,8 +56,6 @@ def test_client_post(client): ]), (302, [ "E", - "E/test", - "E/test.bin", ]), (404, [ "test.bin", @@ -67,6 +65,8 @@ def test_client_post(client): "test/test", "test.bin/test.py", "E.bin", + "E/test", + "E/test.bin", ]), (451, [ "Q.truncate",