From ef591e7ee21435da9314c5f7f6ea983c6f423898 Mon Sep 17 00:00:00 2001 From: Guillaume Virlet Date: Wed, 2 Sep 2015 13:55:39 +0200 Subject: [PATCH] Url: introduce global helper functions for cleanup and scheme detection Relates to #314 & #326 Additions: - add global `cleanup_url()` and `get_url_scheme()` functions Modifications: - replace `Url` usage in `index.php` by calls to global functions - fix `Url` tests not being run: PHPUnit expects a single test class per file - move classes to separate files --- application/LinkDB.php | 4 ++ application/Url.php | 30 +++++++++++++- index.php | 9 ++-- tests/Url/CleanupUrlTest.php | 76 ++++++++++++++++++++++++++++++++++ tests/Url/GetUrlSchemeTest.php | 31 ++++++++++++++ tests/Url/UnparseUrlTest.php | 31 ++++++++++++++ tests/{ => Url}/UrlTest.php | 32 +++----------- 7 files changed, 179 insertions(+), 34 deletions(-) create mode 100644 tests/Url/CleanupUrlTest.php create mode 100644 tests/Url/GetUrlSchemeTest.php create mode 100644 tests/Url/UnparseUrlTest.php rename tests/{ => Url}/UrlTest.php (85%) diff --git a/application/LinkDB.php b/application/LinkDB.php index 463aa47e..84733505 100644 --- a/application/LinkDB.php +++ b/application/LinkDB.php @@ -287,6 +287,10 @@ public function savedb($pageCacheDir) /** * Returns the link for a given URL, or False if it does not exist. + * + * @param string $url URL to search for + * + * @return mixed the existing link if it exists, else 'false' */ public function getLinkFromUrl($url) { diff --git a/application/Url.php b/application/Url.php index 02a4395d..af43b457 100755 --- a/application/Url.php +++ b/application/Url.php @@ -25,6 +25,32 @@ function unparse_url($parsedUrl) return "$scheme$user$pass$host$port$path$query$fragment"; } +/** + * Removes undesired query parameters and fragments + * + * @param string url Url to be cleaned + * + * @return string the string representation of this URL after cleanup + */ +function cleanup_url($url) +{ + $obj_url = new Url($url); + return $obj_url->cleanup(); +} + +/** + * Get URL scheme. + * + * @param string url Url for which the scheme is requested + * + * @return mixed the URL scheme or false if none is provided. + */ +function get_url_scheme($url) +{ + $obj_url = new Url($url); + return $obj_url->getScheme(); +} + /** * URL representation and cleanup utilities * @@ -90,7 +116,7 @@ public function __construct($url) /** * Returns a string representation of this URL */ - public function __toString() + public function toString() { return unparse_url($this->parts); } @@ -149,7 +175,7 @@ public function cleanup() { $this->cleanupQuery(); $this->cleanupFragment(); - return $this->__toString(); + return $this->toString(); } /** diff --git a/index.php b/index.php index e39cff38..61d92f04 100755 --- a/index.php +++ b/index.php @@ -1454,12 +1454,11 @@ function renderPage() // -------- User want to post a new link: Display link edit form. if (isset($_GET['post'])) { - $url = new Url($_GET['post']); - $url->cleanup(); + $url = cleanup_url($_GET['post']); $link_is_new = false; // Check if URL is not already in database (in this case, we will edit the existing link) - $link = $LINKSDB->getLinkFromUrl((string)$url); + $link = $LINKSDB->getLinkFromUrl($url); if (!$link) { $link_is_new = true; @@ -1471,7 +1470,7 @@ function renderPage() $tags = (empty($_GET['tags']) ? '' : $_GET['tags'] ); $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0); // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) - if (empty($title) && strpos($url->getScheme(), 'http') !== false) { + if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { // Short timeout to keep the application responsive list($headers, $data) = get_http_url($url, 4); // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) in html @@ -1505,7 +1504,7 @@ function renderPage() $link = array( 'linkdate' => $linkdate, 'title' => $title, - 'url' => (string)$url, + 'url' => $url, 'description' => $description, 'tags' => $tags, 'private' => $private diff --git a/tests/Url/CleanupUrlTest.php b/tests/Url/CleanupUrlTest.php new file mode 100644 index 00000000..ba9a0437 --- /dev/null +++ b/tests/Url/CleanupUrlTest.php @@ -0,0 +1,76 @@ +assertEquals('', cleanup_url('')); + } + + /** + * Clean an already cleaned Url + */ + public function testCleanupUrlAlreadyClean() + { + $ref = 'http://domain.tld:3000'; + $this->assertEquals($ref, cleanup_url($ref)); + $ref = $ref.'/path/to/dir/'; + $this->assertEquals($ref, cleanup_url($ref)); + } + + /** + * Clean Url needing cleaning + */ + public function testCleanupUrlNeedClean() + { + $ref = 'http://domain.tld:3000'; + $this->assertEquals($ref, cleanup_url($ref.'#tk.rss_all')); + $this->assertEquals($ref, cleanup_url($ref.'#xtor=RSS-')); + $this->assertEquals($ref, cleanup_url($ref.'#xtor=RSS-U3ht0tkc4b')); + $this->assertEquals($ref, cleanup_url($ref.'?action_object_map=junk')); + $this->assertEquals($ref, cleanup_url($ref.'?action_ref_map=Cr4p!')); + $this->assertEquals($ref, cleanup_url($ref.'?action_type_map=g4R84g3')); + + $this->assertEquals($ref, cleanup_url($ref.'?fb_stuff=v41u3')); + $this->assertEquals($ref, cleanup_url($ref.'?fb=71m3w4573')); + + $this->assertEquals($ref, cleanup_url($ref.'?utm_campaign=zomg')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_medium=numnum')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_source=c0d3')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_term=1n4l')); + + $this->assertEquals($ref, cleanup_url($ref.'?xtor=some-url')); + $this->assertEquals($ref, cleanup_url($ref.'?xtor=some-url&fb=som3th1ng')); + $this->assertEquals($ref, cleanup_url( + $ref.'?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' + )); + $this->assertEquals($ref, cleanup_url( + $ref.'?xtor=some-url&fb=som3th1ng#tk.rss_all' + )); + + // ditch annoying query params and fragment, keep useful params + $this->assertEquals( + $ref.'?my=stuff&is=kept', + cleanup_url( + $ref.'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + ) + ); + + // ditch annoying query params, keep useful params and fragment + $this->assertEquals( + $ref.'?my=stuff&is=kept#again', + cleanup_url( + $ref.'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + ) + ); + } +} + diff --git a/tests/Url/GetUrlSchemeTest.php b/tests/Url/GetUrlSchemeTest.php new file mode 100644 index 00000000..72d80b30 --- /dev/null +++ b/tests/Url/GetUrlSchemeTest.php @@ -0,0 +1,31 @@ +assertEquals('', get_url_scheme('')); + } + + /** + * Get normal scheme of Url + */ + public function testGetUrlScheme() + { + $this->assertEquals('http', get_url_scheme('http://domain.tld:3000')); + $this->assertEquals('https', get_url_scheme('https://domain.tld:3000')); + $this->assertEquals('http', get_url_scheme('domain.tld')); + $this->assertEquals('ssh', get_url_scheme('ssh://domain.tld')); + $this->assertEquals('ftp', get_url_scheme('ftp://domain.tld')); + $this->assertEquals('git', get_url_scheme('git://domain.tld/push?pull=clone#checkout')); + } +} + diff --git a/tests/Url/UnparseUrlTest.php b/tests/Url/UnparseUrlTest.php new file mode 100644 index 00000000..edde73e4 --- /dev/null +++ b/tests/Url/UnparseUrlTest.php @@ -0,0 +1,31 @@ +assertEquals('', unparse_url(array())); + } + + /** + * Rebuild a full-featured URL + */ + public function testUnparseFull() + { + $ref = 'http://username:password@hostname:9090/path' + .'?arg1=value1&arg2=value2#anchor'; + $this->assertEquals($ref, unparse_url(parse_url($ref))); + } +} + diff --git a/tests/UrlTest.php b/tests/Url/UrlTest.php similarity index 85% rename from tests/UrlTest.php rename to tests/Url/UrlTest.php index c848e88e..e498d79e 100755 --- a/tests/UrlTest.php +++ b/tests/Url/UrlTest.php @@ -5,30 +5,6 @@ require_once 'application/Url.php'; -/** - * Unitary tests for unparse_url() - */ -class UnparseUrlTest extends PHPUnit_Framework_TestCase -{ - /** - * Thanks for building nothing - */ - public function testUnparseEmptyArray() - { - $this->assertEquals('', unparse_url(array())); - } - - /** - * Rebuild a full-featured URL - */ - public function testUnparseFull() - { - $ref = 'http://username:password@hostname:9090/path' - .'?arg1=value1&arg2=value2#anchor'; - $this->assertEquals($ref, unparse_url(parse_url($ref))); - } -} - /** * Unitary tests for URL utilities */ @@ -44,7 +20,7 @@ private function assertUrlIsCleaned($query='', $fragment='') { $url = new Url(self::$baseUrl.$query.$fragment); $url->cleanup(); - $this->assertEquals(self::$baseUrl, $url->__toString()); + $this->assertEquals(self::$baseUrl, $url->toString()); } /** @@ -52,7 +28,8 @@ private function assertUrlIsCleaned($query='', $fragment='') */ public function testEmptyConstruct() { - $this->assertEquals('', new Url('')); + $url = new Url(''); + $this->assertEquals('', $url->toString()); } /** @@ -62,7 +39,8 @@ public function testConstruct() { $ref = 'http://username:password@hostname:9090/path' .'?arg1=value1&arg2=value2#anchor'; - $this->assertEquals($ref, new Url($ref)); + $url = new Url($ref); + $this->assertEquals($ref, $url->toString()); } /**