diff --git a/doajtest/fixtures/urls.py b/doajtest/fixtures/urls.py new file mode 100644 index 0000000000..0e44633849 --- /dev/null +++ b/doajtest/fixtures/urls.py @@ -0,0 +1,13 @@ +VALID_URL_LISTS = [ + "https://www.sunshine.com", + "http://www.moonlight.com", + "https://www.cosmos.com#galaxy", + "https://www.cosmos.com/galaxy", + "https://www.cosmos.com/galaxy#peanut" +] + +INVALID_URL_LISTS = [ + "ht:www", + "nonexistent.com", + "https://www.doaj.org and https://www.reddit.com" +] \ No newline at end of file diff --git a/doajtest/unit/test_regexes.py b/doajtest/unit/test_regexes.py index c8f41b887e..8f3fcb435a 100644 --- a/doajtest/unit/test_regexes.py +++ b/doajtest/unit/test_regexes.py @@ -1,9 +1,9 @@ """ Gather and test DOAJ regexes here """ from doajtest.helpers import DoajTestCase -from doajtest.fixtures import dois, issns +from doajtest.fixtures import dois, issns, urls -from portality.regex import DOI_COMPILED, ISSN_COMPILED +from portality.regex import DOI_COMPILED, ISSN_COMPILED, HTTP_URL_COMPILED import re @@ -41,3 +41,13 @@ def test_02_ISSN_regex(self): for x in issns.INVLAID_ISSN_LIST: assert not issn_regex.match(x), x + + def test_03_URL_regex(self): + """ Check that the URL regex performs correctly. """ + url_regex = HTTP_URL_COMPILED + + for i in urls.VALID_URL_LISTS: + assert url_regex.match(i), i + + for x in urls.INVALID_URL_LISTS: + assert not url_regex.match(x), x diff --git a/portality/events/combined.py b/portality/events/combined.py new file mode 100644 index 0000000000..869d63ab88 --- /dev/null +++ b/portality/events/combined.py @@ -0,0 +1,11 @@ +from portality.events.shortcircuit import send_event as shortcircuit_send_event +from portality.core import app + + +def send_event(event): + try: + from portality.events.kafka_producer import send_event as kafka_send_event + kafka_send_event(event) + except Exception as e: + app.logger.exception("Failed to send event to Kafka. " + str(e)) + shortcircuit_send_event(event) diff --git a/portality/events/kafka_consumer.py b/portality/events/kafka_consumer.py index 77c812b6e2..0ce1e1120e 100644 --- a/portality/events/kafka_consumer.py +++ b/portality/events/kafka_consumer.py @@ -11,13 +11,19 @@ app = faust.App('events', broker=broker, value_serializer='json') topic = app.topic(topic_name) +event_counter = 0 + @app.agent(topic) async def handle_event(stream): + global event_counter with doajapp.test_request_context("/"): svc = DOAJ.eventsService() async for event in stream: - svc.consume(Event(raw=json.loads(event))) + event_counter += 1 + doajapp.logger.info(f"Kafka event count {event_counter}") + # TODO uncomment the following line once the Event model is fixed to Kafka + # svc.consume(Event(raw=json.loads(event))) if __name__ == '__main__': diff --git a/portality/regex.py b/portality/regex.py index c50f053f7f..7c5773855d 100644 --- a/portality/regex.py +++ b/portality/regex.py @@ -17,7 +17,7 @@ BIG_END_DATE_COMPILED = re.compile(BIG_END_DATE) #~~URL:Regex~~ -HTTP_URL = r'^https?://([^/:]+\.[a-z]{2,63}|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$' +HTTP_URL = r'^https?://([^/:]+\.[a-z]{2,63}|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?(#.*)?$' HTTP_URL_COMPILED = re.compile(HTTP_URL, re.IGNORECASE) diff --git a/portality/settings.py b/portality/settings.py index 9ccce6e412..bb94ef8a7d 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -9,7 +9,7 @@ # Application Version information # ~~->API:Feature~~ -DOAJ_VERSION = "6.3.9" +DOAJ_VERSION = "6.3.10" API_VERSION = "3.0.1" ###################################### diff --git a/portality/static/js/edges/public.journal.edge.js b/portality/static/js/edges/public.journal.edge.js index abb20962f9..47d366d55d 100644 --- a/portality/static/js/edges/public.journal.edge.js +++ b/portality/static/js/edges/public.journal.edge.js @@ -169,7 +169,7 @@ $.extend(true, doaj, { category: "facet", field: "index.country.exact", display: "Publishers' countries", - size: 100, + size: 200, syncCounts: false, lifecycle: "update", updateType: "fresh", diff --git a/setup.py b/setup.py index 09ee64d48a..571b37f957 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='doaj', - version='6.3.9', + version='6.3.10', packages=find_packages(), install_requires=[ "awscli==1.20.50", diff --git a/test.cfg b/test.cfg index b6b18f3b15..085086a956 100644 --- a/test.cfg +++ b/test.cfg @@ -64,7 +64,12 @@ PUBLIC_REGISTER = True LOGIN_VIA_ACCOUNT_ID = True # 2022-12-09 enable the shorcircuit handler until we can fix kafka -EVENT_SEND_FUNCTION = "portality.events.shortcircuit.send_event" +#EVENT_SEND_FUNCTION = "portality.events.shortcircuit.send_event" + +# 2023-08-02 try out the combined event sender +EVENT_SEND_FUNCTION = "portality.events.combined.send_event" +KAFKA_BROKER = "kafka://10.131.35.14:9092" +KAFKA_BOOTSTRAP_SERVER = "10.131.35.14:9092" # No plausible on test PLAUSIBLE_URL = None