From 6ab30d867a9cddeeccffc4b6fee63cad22b44cdc Mon Sep 17 00:00:00 2001 From: mepavv Date: Sat, 18 Feb 2023 01:40:07 +0100 Subject: [PATCH] Initital commit --- .gitignore | 160 +++++++++++++++++++++++++++++++++++++ LICENSE | 21 +++++ README.md | 45 +++++++++++ diki_translate/__init__.py | 1 + diki_translate/diki.py | 49 ++++++++++++ requirements.txt | 2 + setup.cfg | 2 + setup.py | 20 +++++ 8 files changed, 300 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 diki_translate/__init__.py create mode 100644 diki_translate/diki.py create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0ba7618 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2023 Michał Pawłowski. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..508b4ef --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# diki_translate +Moduł Python'a pozwalający na tłumaczenie z polskiego słownika [diki.pl](https://diki.pl). + +## Instalacja +``` +pip install diki_translate +``` + +## Opis funkcji + +Moduł zawiera klasę `Diki` z funkcją `diki.translation(word, exact_word = 1)` + +`word`, akceptuje string, który masz zamiar przetłumaczyć.\ +`exact_word`, opcjonalny argument akceptujący int (domyślnie 1), określający czy funkcja ma zwrócić dokładne tłumaczenie słowa, np. czy dla słowa 'used' mają zostać wyświetlone tłumaczenia słowa 'use'. + + +## Przykłady użycia +Wszystkie operacje są zaimplementowane w klasie `Diki`. +Wymaga ona podania języka, na który masz zamiar tłumaczyć. + +```python +from diki_translate import Diki + +diki = Diki("english") +``` +Wszystkie dozwolone języki to: +"english", "german", "spanish", "italian", "french". + + +* Przykłady tłumaczenia: + + +```python +>>> list(diki.translation('used')) +['używany', 'przyzwyczajony', 'przywykły'] +``` + +```python +>>> list(diki.translation('used', 0)) +['używany', 'przyzwyczajony', 'przywykły', 'używać', 'korzystać (np. z telefonu, toalety)', 'zużywać', 'wykorzystywać (np. kogoś do swoich celów)',...] +``` +\ +Funkcja zwraca iterator by nie żądać więcej wyników niż trzeba. + + diff --git a/diki_translate/__init__.py b/diki_translate/__init__.py new file mode 100644 index 0000000..863e2b4 --- /dev/null +++ b/diki_translate/__init__.py @@ -0,0 +1 @@ +from .diki import Diki \ No newline at end of file diff --git a/diki_translate/diki.py b/diki_translate/diki.py new file mode 100644 index 0000000..e799739 --- /dev/null +++ b/diki_translate/diki.py @@ -0,0 +1,49 @@ +import requests +from bs4 import BeautifulSoup +import re +import contextlib + + +class Diki: + def __init__(self, lang): + """Klasa przyjmuje język, na który masz zamiar tłumaczyć. + Dla przykładu: + >>> diki = Diki("english") + + Wszystkie dozwolone języki to: + "english", "german", "spanish", "italian", "french". + """ + self.lang = lang + + + def _bs4_info(self, word): + langs = { + "english": "angielskiego", + "german": "niemieckiego", + "spanish": 'hiszpanskiego', + "italian": 'wloskiego', + "french": 'francuskiego' + } + + result = requests.get(f'https://www.diki.pl/slownik-{langs[self.lang]}?q={word}') + soup = BeautifulSoup(result.text, 'html.parser') + self.soup = soup + + + def translation(self, word, exact_word = 1): + """Zwraca znalezione tłumaczenia słowa (bez kontekstu). + Dla przykładu: + >>> diki = Diki("english") + >>> list(diki.translation("use")) + ['używać', 'korzystać (np. z telefonu, toalety)', 'zużywać', 'wykorzystywać (np. kogoś do swoich celów)', 'używać (w mowie lub w piśmie)',...] + """ + + r = self._bs4_info(word) + div_class = self.soup.find_all('div','dictionaryEntity') + + for div in div_class: + with contextlib.suppress(AttributeError): + if exact_word == 1 and div.find("span", {"class": "hw"}).text.strip() == word or exact_word != 1: + for m in div.find_all('li', re.compile('^meaning\d+')): + for span in m.find_all('span', 'hw'): + yield span.text \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a98ae43 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +beautifulsoup4 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..224a779 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4112ccb --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +with open('README.md', encoding='utf-8') as f: + long_description = f.read() + +setup( + name="diki_translate", + version=1.0, + license="MIT", + author="Michał Pawłowski", + author_email="", + url='https://github.com/mepavv/diki_translate', + description='A module for translating words using diki.pl dictionary.', + long_description=long_description, + long_description_content_type='text/markdown', + packages=find_packages(), + install_requires=['requests', 'beautifulsoup4'], + keywords=['translation','scraper', 'api', 'polish'], + classifiers=["License :: OSI Approved :: MIT License"], +) \ No newline at end of file