diff --git a/.gitignore b/.gitignore index 6fd0ccd..3ffedb3 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ composer.lock # Ignore test data & output coverage tests/datastore.php +tests/dummycache/ phpmd.html diff --git a/application/Url.php b/application/Url.php new file mode 100644 index 0000000..23356f3 --- /dev/null +++ b/application/Url.php @@ -0,0 +1,150 @@ +parts = parse_url($url); + } + + /** + * Returns a string representation of this URL + */ + public function __toString() + { + return unparse_url($this->parts); + } + + /** + * Removes undesired query parameters + */ + protected function cleanupQuery() + { + if (! isset($this->parts['query'])) { + return; + } + + $queryParams = explode('&', $this->parts['query']); + + foreach (self::$annoyingQueryParams as $annoying) { + foreach ($queryParams as $param) { + if (startsWith($param, $annoying)) { + $queryParams = array_diff($queryParams, array($param)); + continue; + } + } + } + + if (count($queryParams) == 0) { + unset($this->parts['query']); + return; + } + + $this->parts['query'] = implode('&', $queryParams); + } + + /** + * Removes undesired fragments + */ + protected function cleanupFragment() + { + if (! isset($this->parts['fragment'])) { + return; + } + + foreach (self::$annoyingFragments as $annoying) { + if (startsWith($this->parts['fragment'], $annoying)) { + unset($this->parts['fragment']); + break; + } + } + } + + /** + * Removes undesired query parameters and fragments + * + * @return string the string representation of this URL after cleanup + */ + public function cleanup() + { + $this->cleanupQuery(); + $this->cleanupFragment(); + return $this->__toString(); + } +} diff --git a/index.php b/index.php index 84b8f01..74f9549 100755 --- a/index.php +++ b/index.php @@ -74,6 +74,7 @@ require_once 'application/Cache.php'; require_once 'application/CachedPage.php'; require_once 'application/LinkDB.php'; require_once 'application/TimeZone.php'; +require_once 'application/Url.php'; require_once 'application/Utils.php'; require_once 'application/Config.php'; @@ -1479,29 +1480,9 @@ function renderPage() } // -------- User want to post a new link: Display link edit form. - if (isset($_GET['post'])) - { - $url=$_GET['post']; - - // We remove the annoying parameters added by FeedBurner, GoogleFeedProxy, Facebook... - $annoyingpatterns = array('/[\?&]utm_source=[^&]*/', - '/[\?&]utm_campaign=[^&]*/', - '/[\?&]utm_medium=[^&]*/', - '/#xtor=RSS-[^&]*/', - '/[\?&]fb_[^&]*/', - '/[\?&]__scoop[^&]*/', - '/#tk\.rss_all\?/', - '/[\?&]action_ref_map=[^&]*/', - '/[\?&]action_type_map=[^&]*/', - '/[\?&]action_object_map=[^&]*/', - '/[\?&]utm_content=[^&]*/', - '/[\?&]fb=[^&]*/', - '/[\?&]xtor=[^&]*/' - ); - foreach($annoyingpatterns as $pattern) - { - $url = preg_replace($pattern, "", $url); - } + if (isset($_GET['post'])) { + $url = new Url($_GET['post']); + $url->cleanup(); $link_is_new = false; $link = $LINKSDB->getLinkFromUrl($url); // Check if URL is not already in database (in this case, we will edit the existing link) diff --git a/tests/UrlTest.php b/tests/UrlTest.php new file mode 100644 index 0000000..a39630f --- /dev/null +++ b/tests/UrlTest.php @@ -0,0 +1,154 @@ +assertEquals('', unparse_url(array())); + } + + /** + * Rebuild a full-featured URL + */ + public function testUnparseFull() + { + $ref = 'http://username:password@hostname:9090/path' + .'?arg1=value1&arg2=value2#anchor'; + $this->assertEquals($ref, unparse_url(parse_url($ref))); + } +} + +/** + * Unitary tests for URL utilities + */ +class UrlTest extends PHPUnit_Framework_TestCase +{ + // base URL for tests + protected static $baseUrl = 'http://domain.tld:3000'; + + /** + * Helper method + */ + private function assertUrlIsCleaned($query='', $fragment='') + { + $url = new Url(self::$baseUrl.$query.$fragment); + $url->cleanup(); + $this->assertEquals(self::$baseUrl, $url->__toString()); + } + + /** + * Instantiate an empty URL + */ + public function testEmptyConstruct() + { + $this->assertEquals('', new Url('')); + } + + /** + * Instantiate a URL + */ + public function testConstruct() + { + $ref = 'http://username:password@hostname:9090/path' + .'?arg1=value1&arg2=value2#anchor'; + $this->assertEquals($ref, new Url($ref)); + } + + /** + * URL cleanup - nothing to do + */ + public function testNoCleanup() + { + // URL with no query nor fragment + $this->assertUrlIsCleaned(); + + // URL with no annoying elements + $ref = self::$baseUrl.'?p1=val1&p2=1234#edit'; + $url = new Url($ref); + $this->assertEquals($ref, $url->cleanup()); + } + + /** + * URL cleanup - annoying fragment + */ + public function testCleanupFragment() + { + $this->assertUrlIsCleaned('', '#tk.rss_all'); + $this->assertUrlIsCleaned('', '#xtor=RSS-'); + $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b'); + } + + /** + * URL cleanup - single annoying query parameter + */ + public function testCleanupSingleQueryParam() + { + $this->assertUrlIsCleaned('?action_object_map=junk'); + $this->assertUrlIsCleaned('?action_ref_map=Cr4p!'); + $this->assertUrlIsCleaned('?action_type_map=g4R84g3'); + + $this->assertUrlIsCleaned('?fb_stuff=v41u3'); + $this->assertUrlIsCleaned('?fb=71m3w4573'); + + $this->assertUrlIsCleaned('?utm_campaign=zomg'); + $this->assertUrlIsCleaned('?utm_medium=numnum'); + $this->assertUrlIsCleaned('?utm_source=c0d3'); + $this->assertUrlIsCleaned('?utm_term=1n4l'); + + $this->assertUrlIsCleaned('?xtor=some-url'); + } + + /** + * URL cleanup - multiple annoying query parameters + */ + public function testCleanupMultipleQueryParams() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng'); + $this->assertUrlIsCleaned( + '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' + ); + } + + /** + * URL cleanup - multiple annoying query parameters, annoying fragment + */ + public function testCleanupMultipleQueryParamsAndFragment() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all'); + } + + /** + * Nominal case - the URL contains both useful and annoying parameters + */ + public function testCleanupMixedContent() + { + // ditch annoying query params and fragment, keep useful params + $url = new Url( + self::$baseUrl + .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + ); + $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); + + + // ditch annoying query params, keep useful params and fragment + $url = new Url( + self::$baseUrl + .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + ); + $this->assertEquals( + self::$baseUrl.'?my=stuff&is=kept#again', + $url->cleanup() + ); + } +}