From 76e8077f46b4dc99a0e2331b14aa87e631c2e1af Mon Sep 17 00:00:00 2001 From: c0d3d3v Date: Sat, 4 Jun 2022 01:39:54 +0200 Subject: [PATCH] downlad also modules that are not listed on the main page --- moodle_dl/moodle_connector/forums_handler.py | 22 +++++-- moodle_dl/moodle_connector/lessons_handler.py | 34 +++++++++- moodle_dl/moodle_connector/results_handler.py | 65 +++++++++++++++++++ moodle_dl/utils/cutie.py | 1 - moodle_dl/utils/readchar/key_linux.py | 42 +++--------- moodle_dl/utils/readchar/key_windows.py | 2 +- moodle_dl/utils/readchar/read_linux.py | 6 +- 7 files changed, 127 insertions(+), 45 deletions(-) diff --git a/moodle_dl/moodle_connector/forums_handler.py b/moodle_dl/moodle_connector/forums_handler.py index a7657733..537aa40f 100644 --- a/moodle_dl/moodle_connector/forums_handler.py +++ b/moodle_dl/moodle_connector/forums_handler.py @@ -168,7 +168,7 @@ def fetch_forums_posts(self, forums: {}, last_timestamps_per_forum: {}) -> {}: def _get_files_of_discussions(self, latest_discussions: []) -> []: result = [] - for i, discussion in enumerate(latest_discussions): + for counter, discussion in enumerate(latest_discussions): valid_subject = PathTools.to_valid_name(discussion.get('subject', '')) shorted_discussion_name = valid_subject if len(shorted_discussion_name) > 17: @@ -181,7 +181,7 @@ def _get_files_of_discussions(self, latest_discussions: []) -> []: '\r' + 'Downloading posts of discussion' + f' [{shorted_discussion_name:<17}|{discussion_id:6}]' - + f' {i:3d}/{(len(latest_discussions) - 1):3d}\033[K' + + f' {counter + 1:3d}/{len(latest_discussions):3d}\033[K' ), end='', ) @@ -216,8 +216,22 @@ def _get_files_of_discussions(self, latest_discussions: []) -> []: datetime.utcfromtimestamp(discussion_created).strftime('%y-%m-%d') + ' ' + valid_subject ) - post_files = post.get('messageinlinefiles', []) - post_files += post.get('attachments', []) + post_files = post.get('attachments', []) + for inlinefile in post.get('messageinlinefiles', []): + new_inlinefile = True + for attachment in post_files: + if attachment.get('fileurl', '').replace('attachment', 'post') == inlinefile.get('fileurl', ''): + if ( + attachment.get('filesize', 0) == inlinefile.get('filesize', 0) + # we assume that inline attachments can have different timestamps than the actual + # attachment. However, they are still the same file. + # and attachment.get('timemodified', 0) == inlinefile.get('timemodified', 0) + and attachment.get('filename', '') == inlinefile.get('filename', '') + ): + new_inlinefile = False + break + if new_inlinefile: + post_files.append(inlinefile) post_file = { 'filename': post_filename, diff --git a/moodle_dl/moodle_connector/lessons_handler.py b/moodle_dl/moodle_connector/lessons_handler.py index 6d33457e..673755a7 100644 --- a/moodle_dl/moodle_connector/lessons_handler.py +++ b/moodle_dl/moodle_connector/lessons_handler.py @@ -1,3 +1,5 @@ +import re + from moodle_dl.moodle_connector.request_helper import RequestHelper, RequestRejectedError from moodle_dl.state_recorder.course import Course from moodle_dl.download_service.path_tools import PathTools @@ -163,7 +165,7 @@ def _get_files_of_attempt(self, attempt_result: {}, lesson_name: str) -> []: lesson_html = moodle_html_header attempt_filename = PathTools.to_valid_name(lesson_name) lesson_is_empty = True - for answerpage in answerpages: + for counter, answerpage in enumerate(answerpages): page_id = answerpage.get('page', {}).get('id', 0) lesson_id = answerpage.get('page', {}).get('lessonid', 0) @@ -171,6 +173,16 @@ def _get_files_of_attempt(self, attempt_result: {}, lesson_name: str) -> []: if len(shorted_lesson_name) > 17: shorted_lesson_name = shorted_lesson_name[:15] + '..' + print( + ( + '\r' + + 'Downloading lesson pages' + + f' {counter + 1:3d}/{len(answerpages):3d}' + + f' [{shorted_lesson_name:<17}|{lesson_id:6}]\033[K' + ), + end='', + ) + data = {'lessonid': lesson_id, 'pageid': page_id, 'returncontents': 1} try: @@ -190,7 +202,24 @@ def _get_files_of_attempt(self, attempt_result: {}, lesson_name: str) -> []: file_type = page_file.get('type', '') if file_type is None or file_type == '': page_file.update({'type': 'lesson_file'}) - result.append(page_file) + + for page_file in page_files: + new_page_file = True + for attempt_file in result: + if re.sub(r"\/page_contents\/\d+\/", "/", attempt_file.get('fileurl', '')) == re.sub( + r"\/page_contents\/\d+\/", "/", page_file.get('fileurl', '') + ): + if ( + attempt_file.get('filesize', 0) == page_file.get('filesize', 0) + # sometimes the teacher adds the same file for multiple answer pages with a + # different timestamp + # and attempt_file.get('timemodified', 0) == page_file.get('timemodified', 0) + and attempt_file.get('filename', '') == page_file.get('filename', '') + ): + new_page_file = False + break + if new_page_file: + result.append(page_file) if not lesson_is_empty: lesson_html += moodle_html_footer @@ -199,6 +228,7 @@ def _get_files_of_attempt(self, attempt_result: {}, lesson_name: str) -> []: 'filepath': '/', 'timemodified': 0, 'html': lesson_html, + 'filter_urls_during_search_containing': ['/mod_lesson/page_contents/'], 'type': 'html', 'no_search_for_urls': True, } diff --git a/moodle_dl/moodle_connector/results_handler.py b/moodle_dl/moodle_connector/results_handler.py index 909edcb4..51a9c8bc 100644 --- a/moodle_dl/moodle_connector/results_handler.py +++ b/moodle_dl/moodle_connector/results_handler.py @@ -65,6 +65,8 @@ def _get_files_in_sections(self, course_sections: []) -> [File]: files += self._get_files_in_modules(section_name, section_id, section_modules) + files += self._get_files_not_on_main_page() + return files def _get_files_in_modules(self, section_name: str, section_id: int, section_modules: []) -> [File]: @@ -123,6 +125,7 @@ def _get_files_in_modules(self, section_name: str, section_id: int, section_modu if module_modname in self.course_fetch_addons: # find addon with same module_id addon = self.course_fetch_addons.get(module_modname, {}).get(module_id, {}) + addon['on_main_page'] = True addon_files = addon.get('files', []) module_contents += addon_files @@ -133,6 +136,7 @@ def _get_files_in_modules(self, section_name: str, section_id: int, section_modu elif module_modname in self.course_fetch_addons: # find addon with same module_id addon = self.course_fetch_addons.get(module_modname, {}).get(module_id, {}) + addon['on_main_page'] = True addon_files = addon.get('files', []) files += self._handle_files( @@ -143,6 +147,67 @@ def _get_files_in_modules(self, section_name: str, section_id: int, section_modu return files + def _get_modplural(self, modname: str) -> str: + modplural_dict = { + 'assign': 'Assignments', + 'database': 'Databases', + 'folder': 'Folders', + 'forum': 'Forums', + 'lesson': 'Lessons', + 'page': 'Pages', + 'quiz': 'Quizzes', + 'workshop': 'Workshops', + } + if modname in modplural_dict: + return modplural_dict[modname] + else: + return modname.capitalize() + + def _get_files_not_on_main_page(self) -> [File]: + """ + Iterates over all addons to find files (or content) that are not listed on the main page. + @return: A list of files of addons not on the main pange. + """ + files = [] + for addon_modname in self.course_fetch_addons: + section_name = f"{self._get_modplural(addon_modname)} not on main page" + section_id = -1 + + for addon_module_id in self.course_fetch_addons[addon_modname]: + addon = self.course_fetch_addons[addon_modname][addon_module_id] + if 'on_main_page' in addon: + continue + + module_name = addon.get('name', '') + module_modname = addon_modname + module_id = addon.get('id', 0) + module_intro = addon.get('intro', None) + module_contents = addon.get('files', []) + + # Handle not supported modules that results in an index.html special + if module_modname in ['page'] and self.version < 2017051500: + module_modname = 'index_mod-' + module_modname + + if module_intro is not None and module_modname not in [ + 'page', + 'forum', + 'database', + 'lesson', + 'quiz', + 'workshop', + 'assign', + ]: + # Handle descriptions of Files, Labels and all that we do not handle in seperate modules + files += self._handle_description( + section_name, section_id, module_name, module_modname, module_id, module_intro + ) + + files += self._handle_files( + section_name, section_id, module_name, module_modname, module_id, module_contents + ) + + return files + @staticmethod def _filter_changing_attributes(description: str) -> str: """ diff --git a/moodle_dl/utils/cutie.py b/moodle_dl/utils/cutie.py index d51fff67..579d9626 100644 --- a/moodle_dl/utils/cutie.py +++ b/moodle_dl/utils/cutie.py @@ -18,7 +18,6 @@ import moodle_dl.utils.readchar as readchar - terminal = os.getenv('TERM') if terminal is None: init() diff --git a/moodle_dl/utils/readchar/key_linux.py b/moodle_dl/utils/readchar/key_linux.py index a4c1c85b..2fb1cb85 100644 --- a/moodle_dl/utils/readchar/key_linux.py +++ b/moodle_dl/utils/readchar/key_linux.py @@ -56,14 +56,14 @@ F2 = "\x1b\x4f\x51" F3 = "\x1b\x4f\x52" F4 = "\x1b\x4f\x53" -F5 = "\x1b\x4f\x31\x35\x7e" -F6 = "\x1b\x4f\x31\x37\x7e" -F7 = "\x1b\x4f\x31\x38\x7e" -F8 = "\x1b\x4f\x31\x39\x7e" -F9 = "\x1b\x4f\x32\x30\x7e" -F10 = "\x1b\x4f\x32\x31\x7e" -F11 = "\x1b\x4f\x32\x33\x7e" -F12 = "\x1b\x4f\x32\x34\x7e" +F5 = "\x1b\x5b\x31\x35\x7e" +F6 = "\x1b\x5b\x31\x37\x7e" +F7 = "\x1b\x5b\x31\x38\x7e" +F8 = "\x1b\x5b\x31\x39\x7e" +F9 = "\x1b\x5b\x32\x30\x7e" +F10 = "\x1b\x5b\x32\x31\x7e" +F11 = "\x1b\x5b\x32\x33\x7e" +F12 = "\x1b\x5b\x32\x34\x7e" PAGE_UP = "\x1b\x5b\x35\x7e" PAGE_DOWN = "\x1b\x5b\x36\x7e" @@ -72,29 +72,3 @@ INSERT = "\x1b\x5b\x32\x7e" SUPR = "\x1b\x5b\x33\x7e" - - -ESCAPE_SEQUENCES = ( - ESC, - ESC + "\x5b", - ESC + "\x5b" + "\x31", - ESC + "\x5b" + "\x32", - ESC + "\x5b" + "\x33", - ESC + "\x5b" + "\x35", - ESC + "\x5b" + "\x36", - ESC + "\x5b" + "\x31" + "\x35", - ESC + "\x5b" + "\x31" + "\x36", - ESC + "\x5b" + "\x31" + "\x37", - ESC + "\x5b" + "\x31" + "\x38", - ESC + "\x5b" + "\x31" + "\x39", - ESC + "\x5b" + "\x32" + "\x30", - ESC + "\x5b" + "\x32" + "\x31", - ESC + "\x5b" + "\x32" + "\x32", - ESC + "\x5b" + "\x32" + "\x33", - ESC + "\x5b" + "\x32" + "\x34", - ESC + "\x4f", - ESC + ESC, - ESC + ESC + "\x5b", - ESC + ESC + "\x5b" + "\x32", - ESC + ESC + "\x5b" + "\x33", -) diff --git a/moodle_dl/utils/readchar/key_windows.py b/moodle_dl/utils/readchar/key_windows.py index 89e2b532..0de3dccd 100644 --- a/moodle_dl/utils/readchar/key_windows.py +++ b/moodle_dl/utils/readchar/key_windows.py @@ -17,7 +17,7 @@ CTRL_E = "\x05" CTRL_F = "\x06" CTRL_G = "\x07" -CTRL_H = "\x08" +CTRL_H = BACKSPACE CTRL_I = "\t" CTRL_J = "\n" CTRL_K = "\x0b" diff --git a/moodle_dl/utils/readchar/read_linux.py b/moodle_dl/utils/readchar/read_linux.py index 1dfc27cc..fa6624b5 100644 --- a/moodle_dl/utils/readchar/read_linux.py +++ b/moodle_dl/utils/readchar/read_linux.py @@ -41,15 +41,15 @@ def readkey(): return c1 c2 = readchar(blocking=True) - if c2 not in ["\x4F", "\x5B"]: + if c2 not in "\x4F\x5B": return c1 + c2 c3 = readchar(blocking=True) - if c3 not in ["\x31", "\x32", "\x33", "\x35", "\x36"]: + if c3 not in "\x31\x32\x33\x35\x36": return c1 + c2 + c3 c4 = readchar(blocking=True) - if c2 != "\x4F" or c4 not in ["\x30", "\x31", "\x33", "\x34", "\x35", "\x37", "\x38", "\x39"]: + if c4 not in "\x30\x31\x33\x34\x35\x37\x38\x39": return c1 + c2 + c3 + c4 c5 = readchar(blocking=True)