From 231720995961625f56d8a2e477693251fa30a285 Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 16 Jun 2023 12:56:02 +0100 Subject: [PATCH 01/10] add match for fragment identifier to the http url regex --- portality/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/regex.py b/portality/regex.py index c50f053f7f..7c5773855d 100644 --- a/portality/regex.py +++ b/portality/regex.py @@ -17,7 +17,7 @@ BIG_END_DATE_COMPILED = re.compile(BIG_END_DATE) #~~URL:Regex~~ -HTTP_URL = r'^https?://([^/:]+\.[a-z]{2,63}|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$' +HTTP_URL = r'^https?://([^/:]+\.[a-z]{2,63}|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?(#.*)?$' HTTP_URL_COMPILED = re.compile(HTTP_URL, re.IGNORECASE) From 053c610ed294c9a6eeb0bc9e52cc6e11f4a66019 Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Tue, 4 Jul 2023 18:17:13 +0530 Subject: [PATCH 02/10] Added combined event handler to send events to both kafka and shortcircuit --- portality/bll/services/events.py | 1 + portality/events/combined.py | 11 +++++++++++ portality/events/kafka_consumer.py | 4 +++- 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 portality/events/combined.py diff --git a/portality/bll/services/events.py b/portality/bll/services/events.py index 2b27b85beb..804989b350 100644 --- a/portality/bll/services/events.py +++ b/portality/bll/services/events.py @@ -57,6 +57,7 @@ def consume(self, event): for consumer in self.EVENT_CONSUMERS: try: if consumer.consumes(event): + pass consumer.consume(event) except Exception as e: app.logger.error("Error in consumer {x}: {e}".format(e=str(e), x=consumer.ID)) diff --git a/portality/events/combined.py b/portality/events/combined.py new file mode 100644 index 0000000000..869d63ab88 --- /dev/null +++ b/portality/events/combined.py @@ -0,0 +1,11 @@ +from portality.events.shortcircuit import send_event as shortcircuit_send_event +from portality.core import app + + +def send_event(event): + try: + from portality.events.kafka_producer import send_event as kafka_send_event + kafka_send_event(event) + except Exception as e: + app.logger.exception("Failed to send event to Kafka. " + str(e)) + shortcircuit_send_event(event) diff --git a/portality/events/kafka_consumer.py b/portality/events/kafka_consumer.py index 77c812b6e2..bb75a298ae 100644 --- a/portality/events/kafka_consumer.py +++ b/portality/events/kafka_consumer.py @@ -17,7 +17,9 @@ async def handle_event(stream): with doajapp.test_request_context("/"): svc = DOAJ.eventsService() async for event in stream: - svc.consume(Event(raw=json.loads(event))) + pass + # TODO uncomment the following line once the Event model is fixed to Kafka + # svc.consume(Event(raw=json.loads(event))) if __name__ == '__main__': From dd16de86546161580ef634f306fda1cc81e4f2f7 Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Tue, 4 Jul 2023 18:29:46 +0530 Subject: [PATCH 03/10] Undo the changes --- portality/bll/services/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/portality/bll/services/events.py b/portality/bll/services/events.py index 804989b350..2b27b85beb 100644 --- a/portality/bll/services/events.py +++ b/portality/bll/services/events.py @@ -57,7 +57,6 @@ def consume(self, event): for consumer in self.EVENT_CONSUMERS: try: if consumer.consumes(event): - pass consumer.consume(event) except Exception as e: app.logger.error("Error in consumer {x}: {e}".format(e=str(e), x=consumer.ID)) From be55e81cf7711f766b254805f54840e4539eaf6b Mon Sep 17 00:00:00 2001 From: Aga Date: Fri, 7 Jul 2023 12:14:35 +0100 Subject: [PATCH 04/10] add ut for url regex --- doajtest/fixtures/urls.py | 12 ++++++++++++ doajtest/unit/test_regexes.py | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 doajtest/fixtures/urls.py diff --git a/doajtest/fixtures/urls.py b/doajtest/fixtures/urls.py new file mode 100644 index 0000000000..9329859439 --- /dev/null +++ b/doajtest/fixtures/urls.py @@ -0,0 +1,12 @@ +VALID_URL_LISTS = [ + "https://www.sunshine.com", + "http://www.moonlight.com", + "https://www.cosmos.com#galaxy", + "https://www.cosmos.com/galaxy", + "https://www.cosmos.com/galaxy#peanut" +] + +INVALID_URL_LISTS = [ + "ht:www", + "nonexistent.com" +] \ No newline at end of file diff --git a/doajtest/unit/test_regexes.py b/doajtest/unit/test_regexes.py index c8f41b887e..8f3fcb435a 100644 --- a/doajtest/unit/test_regexes.py +++ b/doajtest/unit/test_regexes.py @@ -1,9 +1,9 @@ """ Gather and test DOAJ regexes here """ from doajtest.helpers import DoajTestCase -from doajtest.fixtures import dois, issns +from doajtest.fixtures import dois, issns, urls -from portality.regex import DOI_COMPILED, ISSN_COMPILED +from portality.regex import DOI_COMPILED, ISSN_COMPILED, HTTP_URL_COMPILED import re @@ -41,3 +41,13 @@ def test_02_ISSN_regex(self): for x in issns.INVLAID_ISSN_LIST: assert not issn_regex.match(x), x + + def test_03_URL_regex(self): + """ Check that the URL regex performs correctly. """ + url_regex = HTTP_URL_COMPILED + + for i in urls.VALID_URL_LISTS: + assert url_regex.match(i), i + + for x in urls.INVALID_URL_LISTS: + assert not url_regex.match(x), x From 319fabd1feb049524eb38a3fdd7cabc9cb49f17a Mon Sep 17 00:00:00 2001 From: Aga Date: Tue, 18 Jul 2023 14:08:17 +0100 Subject: [PATCH 05/10] add unit test to cover multiple urls --- doajtest/fixtures/urls.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doajtest/fixtures/urls.py b/doajtest/fixtures/urls.py index 9329859439..0e44633849 100644 --- a/doajtest/fixtures/urls.py +++ b/doajtest/fixtures/urls.py @@ -8,5 +8,6 @@ INVALID_URL_LISTS = [ "ht:www", - "nonexistent.com" + "nonexistent.com", + "https://www.doaj.org and https://www.reddit.com" ] \ No newline at end of file From 653f4bc98f4d0d0f11182c44cd214df73f51aaa1 Mon Sep 17 00:00:00 2001 From: Aga Date: Mon, 31 Jul 2023 09:53:37 +0100 Subject: [PATCH 06/10] Increase number of results for publishers countries facet to 200 - exceedes the number of countries in the World for today --- portality/static/js/edges/public.journal.edge.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/js/edges/public.journal.edge.js b/portality/static/js/edges/public.journal.edge.js index abb20962f9..47d366d55d 100644 --- a/portality/static/js/edges/public.journal.edge.js +++ b/portality/static/js/edges/public.journal.edge.js @@ -169,7 +169,7 @@ $.extend(true, doaj, { category: "facet", field: "index.country.exact", display: "Publishers' countries", - size: 100, + size: 200, syncCounts: false, lifecycle: "update", updateType: "fresh", From 783842da8ccb206a915ed22b49b8f5e1b63b0f6d Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Mon, 31 Jul 2023 16:50:44 +0100 Subject: [PATCH 07/10] Version bump for country facet change --- portality/settings.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/settings.py b/portality/settings.py index 9bd5a61d92..152e3ec2d4 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -9,7 +9,7 @@ # Application Version information # ~~->API:Feature~~ -DOAJ_VERSION = "6.3.9" +DOAJ_VERSION = "6.3.10" API_VERSION = "3.0.1" ###################################### diff --git a/setup.py b/setup.py index 09ee64d48a..571b37f957 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='doaj', - version='6.3.9', + version='6.3.10', packages=find_packages(), install_requires=[ "awscli==1.20.50", From 82e20d8d7880c9aaf4db186de5ab425c44b89b77 Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Wed, 2 Aug 2023 11:30:44 +0100 Subject: [PATCH 08/10] Use combined sender with kafka machine on test --- test.cfg | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test.cfg b/test.cfg index b6b18f3b15..b41b5f9eb1 100644 --- a/test.cfg +++ b/test.cfg @@ -64,7 +64,12 @@ PUBLIC_REGISTER = True LOGIN_VIA_ACCOUNT_ID = True # 2022-12-09 enable the shorcircuit handler until we can fix kafka -EVENT_SEND_FUNCTION = "portality.events.shortcircuit.send_event" +#EVENT_SEND_FUNCTION = "portality.events.shortcircuit.send_event" + +# 2023-08-02 try out the combined event sender +EVENT_SEND_FUNCTION = "portality.events.combined.send_event" +KAFKA_BROKER = "kafka://167.99.207.136:9092" +KAFKA_BOOTSTRAP_SERVER = "167.99.207.136:9092" # No plausible on test PLAUSIBLE_URL = None From 0dc5608d1965cc031b019117e5f57d1b80a5254d Mon Sep 17 00:00:00 2001 From: Steve Eardley Date: Thu, 3 Aug 2023 11:25:44 +0100 Subject: [PATCH 09/10] Update to use internal IP for kafka --- test.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.cfg b/test.cfg index b41b5f9eb1..085086a956 100644 --- a/test.cfg +++ b/test.cfg @@ -68,8 +68,8 @@ LOGIN_VIA_ACCOUNT_ID = True # 2023-08-02 try out the combined event sender EVENT_SEND_FUNCTION = "portality.events.combined.send_event" -KAFKA_BROKER = "kafka://167.99.207.136:9092" -KAFKA_BOOTSTRAP_SERVER = "167.99.207.136:9092" +KAFKA_BROKER = "kafka://10.131.35.14:9092" +KAFKA_BOOTSTRAP_SERVER = "10.131.35.14:9092" # No plausible on test PLAUSIBLE_URL = None From 35a7367acccb7a2f1344dd2ad2a2bced9949a542 Mon Sep 17 00:00:00 2001 From: Ramakrishna Date: Thu, 3 Aug 2023 16:28:51 +0530 Subject: [PATCH 10/10] added log to check the count of events --- portality/events/kafka_consumer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/portality/events/kafka_consumer.py b/portality/events/kafka_consumer.py index bb75a298ae..0ce1e1120e 100644 --- a/portality/events/kafka_consumer.py +++ b/portality/events/kafka_consumer.py @@ -11,13 +11,17 @@ app = faust.App('events', broker=broker, value_serializer='json') topic = app.topic(topic_name) +event_counter = 0 + @app.agent(topic) async def handle_event(stream): + global event_counter with doajapp.test_request_context("/"): svc = DOAJ.eventsService() async for event in stream: - pass + event_counter += 1 + doajapp.logger.info(f"Kafka event count {event_counter}") # TODO uncomment the following line once the Event model is fixed to Kafka # svc.consume(Event(raw=json.loads(event)))