What are path traversal attacks? How to prevent them?

Path traversal attacks may cause your sensitive data to be leaked. This attack relies on adding "../" or similar sequences, so the application accesses files outside their specified root.

WARNING: We’re not responsible for damage caused by path traversal attacks! Malicious hacking is a computer crime and you may face legal consequences! This post is meant to gain awareness about path traversal attacks and give a way to prevent those vulnerabilities.

The impact of path traversal attacks

Path traversal attacks may cause leakage of:

Application code and data
Credentials for the application
Sensitive operating system files
Other sensitive data

In some cases, an attacker might be able to write to arbitrary files, which may lead to data loss, broken web applications, and even seizing the control of the server by the attacker.

Example: a static web server written in JavaScript

Let's take the vulnerable server from our tutorial for a static web server running on Node.JS:

//WARNING!!! PATH TRAVERSAL
var http = require("http");
var fs = require("fs");
var port = 8080;
var server = http.createServer(function (req, res) {
  var filename = "." + req.url;
  if(filename == "./") filename = "./index.html";
  fs.readFile(filename, function(err, data) {
    if(err) {
      if(err.code == "ENOENT") {
        //ENOENT means "File doesn't exist"
        res.writeHead(404, "Not Found", {
          "Content-Type": "text/plain"
        });
        res.end("404 Not Found");
      } else {
        res.writeHead(500, "Internal Server Error", {
          "Content-Type": "text/plain"
        });
        res.end("500 Internal Server Error! Reason: " + err.message);
      }
    } else {
      res.writeHead(200, "OK", {
        "Content-Type": "text/html"
      });
      res.end(data);
    }
  });
});
server.listen(port, function() {
  console.log("Started server at port " + port + ".");
});

Let's assume the web root is in /home/user/server/. If the request URL is /robots.txt, then the web server retrieves data from /home/user/server/robots.txt file and will return the contents of robots.txt file.

But what if the request path was /../../../../../../../../etc/passwd? In this case, the web server goes back to parent directory multiple times, and retrieves the data in /etc/passwd file outside the web root. The /etc/passwd file contents are leaked!

Sometimes, there is protection that works on ../ sequences but fails with one of these sequences:

..\
..%2f (represents ../)
%2e%2e%2f (represents ../)
%2e%2e/ (represents ../)
..%5c (represents ..\)
%2e%2e%5c (represents ..\)
%2e%2e\ (represents ..\)
%252e%252e%255c (double URL encoding; represents ..\)
..%255c (double URL encoding; represents ..\)
..%c0%af (represents ../)
..%c1%9c (represents ..\)
....// (nested path traversal sequence)
....\\ (nested path traversal sequence)

and so on. The \ character is a path component separator in Windows (in many other OSes, like GNU/Linux, the separator is /).

Path traversal attack prevention

You can prevent path traversal attacks by using a path sanitizer, by normalizing paths or by simply removing path traversal sequences. It is also important, that path sanitation bypass sequences shown above are removed as well. Ensure, that URLs with double URL encoding are not decoded back to ../ or ..\ sequences.

The fixed server from the example above is as follows:

var http = require("http");
var fs = require("fs");
var mime = require("mime-types");
var path = require("path");
var os = require("os");
var port = 8080;
var server = http.createServer(function (req, res) {
  var urlObject = new URL(req.url, "http://localhost");
  var filename = "";
  try {
    filename = "." + decodeURIComponent(urlObject.pathname);
  } catch(ex) {
    //Malformed URI means bad request.
    res.writeHead(400, "Bad Request", {
      "Content-Type": "text/plain"
    });
    res.end("400 Bad Request");
    return;
  }
  filename = filename.replace(/\\/g,"/").replace(/\0|%00/g,"").replace(/\/\.\.?(?=\/|$)/g,"/").replace(/\/+/g,"/"); //Poor mans URL sanitizer
  if(filename == "./") filename = "./index.html";
  var ext = path.extname(filename).substr(1); //path.extname gives "." character, so we're using substr(1) method.
  if(filename == ("./" + path.basename(__filename)) || (os.platform() == "win32" && filename.toLowerCase() == ("./" + path.basename(__filename)).toLowerCase())) {
    //Prevent leakage of server source code
    res.writeHead(403, "Forbidden", {
      "Content-Type": "text/plain"
    });
    res.end("403 Forbidden");
    return;
  }
  fs.readFile(filename, function(err, data) {
    if(err) {
      if(err.code == "ENOENT") {
        //ENOENT means "File doesn't exist"
        res.writeHead(404, "Not Found", {
          "Content-Type": "text/plain"
        });
        res.end("404 Not Found");
      } else {
        res.writeHead(500, "Internal Server Error", {
          "Content-Type": "text/plain"
        });
        res.end("500 Internal Server Error! Reason: " + err.message);
      }
    } else {
      res.writeHead(200, "OK", {
        "Content-Type": mime.lookup(ext) || undefined
      });
      res.end(data);
    }
  });
});
server.listen(port, function() {
  console.log("Started server at port " + port + ".");
});

The fixed server will first replace every "" character with "/". Then it will remove "%00" and null bytes. Later it will remove "../" and "./" sequences. Finally it removes duplicate slashes.

Let's try /../../../../../../../../etc/passwd sequence! First, the path name is obtained from the request URL. Then the path name gets decoded to /../../../../../../../../etc/passwd. Later the path name is sanitized to /etc/passwd. The server will later try to access /home/user/server/etc/passwd file; at least it is not accessing /etc/passwd file outside the web root.

What about bypasses? Let's try /..%c0%af..%c0%af..%c0%af..%c0%af..%c0%af..%c0%af..%c0%af..%c0%afetc/passwd! The path name is obtained from the request URL, but the decoding fails, as decodeURIComponent JavaScript function will throw "URI malformed" error. Then the error gets caught, and the server responds with 400 Bad Request HTTP status code.

You can use a path sanitizer (the example is written in JavaScript and used by SVR.JS itself; view the source), as shown below:

// SVR.JS path sanitizer function
function sanitizeURL(resource, allowDoubleSlashes) {
  if (resource == "*" || resource == "") return resource;
  // Remove null characters
  resource = resource.replace(/%00|\0/g, "");
  // Check if URL is malformed (e.g. %c0%af or %u002f or simply %as)
  if (resource.match(/%(?:c[01]|f[ef]|(?![0-9a-f]{2}).{2}|.{0,1}$)/i)) throw new URIError("URI malformed");
  // Decode URL-encoded characters while preserving certain characters
  resource = resource.replace(/%([0-9a-f]{2})/gi, function (match, hex) {
    var decodedChar = String.fromCharCode(parseInt(hex, 16));
    return /(?!["<>^'{|}?#%])[!-~]/.test(decodedChar) ? decodedChar : "%" + hex;
  });
  // Encode certain characters
  resource = resource.replace(/[<>^'{|}]]/g, function (character) {
    var charCode = character.charCodeAt(0);
    return "%" + (charCode < 16 ? "0" : "") + charCode.toString(16).toUpperCase();
  });
  var sanitizedResource = resource;
  // Ensure the resource starts with a slash
  if (resource[0] != "/") sanitizedResource = "/" + sanitizedResource;
  // Convert backslashes to slashes and handle duplicate slashes
  sanitizedResource = sanitizedResource.replace(/\\/g, "/").replace(allowDoubleSlashes ? /\/{3,}/g : /\/+/g, "/");
  // Handle relative navigation (e.g., "/./", "/../", "../", "./"), also remove trailing dots in paths
  sanitizedResource = sanitizedResource.replace(/\/\.(?:\.{2,})?(?=\/|$)/g, "").replace(/([^.\/])\.+(?=\/|$)/g, "$1");
  while (sanitizedResource.match(/\/(?!\.\.\/)[^\/]+\/\.\.(?=\/|$)/)) {
    sanitizedResource = sanitizedResource.replace(/\/(?!\.\.\/)[^\/]+\/\.\.(?=\/|$)/g, "");
  }
  sanitizedResource = sanitizedResource.replace(/\/\.\.(?=\/|$)/g, "");
  if (sanitizedResource.length == 0) return "/";
  else return sanitizedResource;
}