From 67063ed439dd0843536f27e8cde40a8a7d69f37b Mon Sep 17 00:00:00 2001
From: Arne Brasseur <arne@arnebrasseur.net>
Date: Mon, 27 Mar 2023 15:36:42 -0400
Subject: [PATCH] Treat a backslash in the authority section as part of the
 path

Mimic browsers (for this specific case) by treating a backslash as if it were a
forward slash, thus ending the authority section and starting the path section.
This way our interpretation of what the authority of a given URI is matches
browsers, and many other popular libraries.

Addresses CVE-2023-28628
---
 src/lambdaisland/uri.cljc       |  4 ++--
 test/lambdaisland/uri_test.cljc | 30 +++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 7 deletions(-)

Index: lambdaisland-uri-clojure-1.13.95/src/lambdaisland/uri.cljc
===================================================================
--- lambdaisland-uri-clojure-1.13.95.orig/src/lambdaisland/uri.cljc
+++ lambdaisland-uri-clojure-1.13.95/src/lambdaisland/uri.cljc
@@ -4,8 +4,8 @@
             [lambdaisland.uri.normalize :as normalize])
   #?(:clj (:import clojure.lang.IFn)))
 
-(def uri-regex #?(:clj #"\A(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)?(\?([^#]*))?(#(.*))?\z"
-                  :cljs #"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)?(\?([^#]*))?(#(.*))?$"))
+(def uri-regex #?(:clj #"\A(([^:/?#]+):)?(//([^/?#\\]*))?([^?#]*)?(\?([^#]*))?(#(.*))?\z"
+                  :cljs #"^(([^:/?#]+):)?(//([^/?#\\]*))?([^?#]*)?(\?([^#]*))?(#(.*))?$"))
 (def authority-regex #?(:clj #"\A(([^:]*)(:(.*))?@)?([^:]*)(:(\d*))?\z"
                         :cljs #"^(([^:]*)(:(.*))?@)?([^:]*)(:(\d*))?$"))
 
Index: lambdaisland-uri-clojure-1.13.95/test/lambdaisland/uri_test.cljc
===================================================================
--- lambdaisland-uri-clojure-1.13.95.orig/test/lambdaisland/uri_test.cljc
+++ lambdaisland-uri-clojure-1.13.95/test/lambdaisland/uri_test.cljc
@@ -179,8 +179,28 @@
 
 (tc/defspec query-string-round-trips 100
   (prop/for-all [q query-map-gen]
-    (let [res (-> q
-                  uri/map->query-string
-                  uri/query-string->map)]
-      (or (and (empty? q) (empty? res)) ;; (= nil {})
-          (= q res)))))
+                (let [res (-> q
+                              uri/map->query-string
+                              uri/query-string->map)]
+                  (or (and (empty? q) (empty? res)) ;; (= nil {})
+                      (= q res)))))
+
+(deftest backslash-in-authority-test
+  ;; A backslash is not technically a valid character in a URI (see RFC 3986
+  ;; section 2), and so should always be percent encoded. The problem is that
+  ;; user-facing software (e.g. browsers) rarely if ever rejects invalid
+  ;; URIs/URLs, leading to ad-hoc rules about how to map the set of invalid URIs
+  ;; to valid URIs. All modern browsers now interpret a backslash as a forward
+  ;; slash, which changes the interpretation of the URI. For this test (and
+  ;; accompanying patch) we only care about the specific case of a backslash
+  ;; appearing inside the authority section, since this authority or _origin_ is
+  ;; regularly used to inform security policies, e.g. to check if code served
+  ;; from a certain origin has access to resources with the same origin. In this
+  ;; case we partially mimic what browsers do, by treating the backslash as a
+  ;; delimiter which starts the path section, even though we don't replace it
+  ;; with a forward slash, but leave it as-is in the parsed result.
+  (let [{:keys [host path user]}
+        (uri/uri "https://example.com\\@gaiwan.co")]
+    (is (= "example.com" host))
+    (is (= nil user))
+    (is (= "\\@gaiwan.co" path))))
