From 74d1e595e96ba6f8a0f1d7379111f70d3afae857 Mon Sep 17 00:00:00 2001
From: Adnan Issadeen <eddyrox1@hotmail.com>
Date: Wed, 23 Sep 2015 17:39:45 +0530
Subject: [PATCH] Made url matching better! Addresses #4

The url matcher can now take a url of the formats

* https://domain.com
* http://domain.com
* http://www.domain.com
* http://www.sub.domain.com
* http://sub.domain.com
* http://sub.domain.com/

and a few more and match it to the key

'domain.com'

It does this by first removing the protocol part of the url.

It then takes out the www. part if it exists

It proceeds to remove the ending slash

It then checks if it can find the url thus far inside the dictionary.
This is done so that we can match a sub domain only instead of always
matching the full root domain. Example would be a company blog that
resides on a blogging service. The blogging service shouldn't be
penalised, but <domain>.blogginservice.com should.
---
 core/matcher.js          | 25 ++++++++++++++++++++++++-
 core/spec/MatcherSpec.js | 29 +++++++++++++++++++----------
 2 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/core/matcher.js b/core/matcher.js
index 702436f..8a6bedd 100644
--- a/core/matcher.js
+++ b/core/matcher.js
@@ -2,10 +2,33 @@ function doesUrlMatchBlackList(urlToMatch, dictionary){
     if(urlToMatch.indexOf('http') == -1){
         return false;
     }
+    urlToMatch = urlToMatch.slice(urlToMatch.indexOf('://')+3);
+    if (urlToMatch.indexOf('www.') === 0){
+        urlToMatch = urlToMatch.slice(urlToMatch.indexOf('www.') + 4);
+    }
+    if (urlToMatch.indexOf('/') !== -1){
+        urlToMatch = urlToMatch.slice(0, urlToMatch.indexOf('/'));
+    }
+    console.log(urlToMatch);
+    if(dictionary.urls[urlToMatch]){
+        return true;
+    }
+    while ( true ){
+        if(urlToMatch.indexOf('.') === -1){
+            break;
+        }
+        if (urlToMatch.indexOf('.') !== urlToMatch.lastIndexOf('.')){
+            urlToMatch = urlToMatch.slice(urlToMatch.indexOf('.')+1);
+        }
+        if(urlToMatch.indexOf('.') === urlToMatch.lastIndexOf('.')){
+            break;
+        }
+    }
+    console.log(urlToMatch);
     if(dictionary.urls[urlToMatch]){
         return true;
     }
-    
+    return false;
 }
 
 module.exports.doesUrlMatchBlackList = doesUrlMatchBlackList;
diff --git a/core/spec/MatcherSpec.js b/core/spec/MatcherSpec.js
index d97af46..05a80e7 100644
--- a/core/spec/MatcherSpec.js
+++ b/core/spec/MatcherSpec.js
@@ -1,17 +1,18 @@
 describe("Matcher", function(){
     matcher = require('../matcher.js');
     var dictionary = {urls:
-        {"https://www.heroku.com/": {
-            "isDirect": false,
-            "parent": "https://salesforce.com/",
-            "source": "",
-            "description": "",
+        {
+            "heroku.com": {
+                "isDirect": false,
+                "parent": "https://salesforce.com/",
+                "source": "",
+                "description": "",
             },
-        "https://www.salesforce.com/": {
-            "isDirect": true,
-            "parent": "",
-            "source": "",
-            "description": "",
+            "salesforce.com": {
+                "isDirect": true,
+                "parent": "",
+                "source": "",
+                "description": "",
             }
         }
     };
@@ -23,4 +24,12 @@ describe("Matcher", function(){
         expect(matcher.doesUrlMatchBlackList('https://www.heroku.com/', dictionary)).toBe(true);
         expect(matcher.doesUrlMatchBlackList('https://www.salesforce.com/', dictionary)).toBe(true);
     });
+    it("should return true without www", function(){
+        expect(matcher.doesUrlMatchBlackList('https://heroku.com/', dictionary)).toBe(true);
+        expect(matcher.doesUrlMatchBlackList('https://salesforce.com/', dictionary)).toBe(true);
+    });
+    it("should return true for subdomains", function(){
+        expect(matcher.doesUrlMatchBlackList('https://sub.heroku.com/', dictionary)).toBe(true);
+        expect(matcher.doesUrlMatchBlackList('http://www.sub.salesforce.com/', dictionary)).toBe(true);
+    });
 });