diff --git a/docker1hw/Dockerfile b/docker1hw/Dockerfile new file mode 100644 index 0000000..b751463 --- /dev/null +++ b/docker1hw/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.10.2-alpine3.15 +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./web_scraping_sample.py /root/workspace/src +# Switch to project directory +WORKDIR /root/workspace/src +# Install required packages +RUN pip install --upgrade pip +RUN pip install requests bs4 html5lib +RUN pip3 install psycopg2-binary --user diff --git a/docker1hw/docker-compose.yaml b/docker1hw/docker-compose.yaml new file mode 100644 index 0000000..cad1491 --- /dev/null +++ b/docker1hw/docker-compose.yaml @@ -0,0 +1,26 @@ +version: "3" +services: + pyhton_service: + build: + context: ./ + dockerfile: Dockerfile + image: workshop1 + container_name: workshop_python_container + stdin_open: true # docker attach container_id + tty: true + ports: + - "8000:8000" + volumes: + - .:/app + depends_on: + - postgres_service + + postgres_service: + image: postgres + container_name: workshop_postgres_container + ports: + - "5432:5432" + environment: + POSTGRES_PASSWORD: admin + volumes: + - .:/var/lib/postgres diff --git a/docker1hw/web_scraping_sample.py b/docker1hw/web_scraping_sample.py new file mode 100644 index 0000000..00b938b --- /dev/null +++ b/docker1hw/web_scraping_sample.py @@ -0,0 +1,35 @@ +import requests +from bs4 import BeautifulSoup +import psycopg2 + +#edited the script +url = 'https://blog.python.org/' +response = requests.get(url) + + +soup = BeautifulSoup(response.content, 'html.parser') + + +titles = soup.find_all('h3', class_='post-title') +dates = soup.find_all('h2', class_='date-header') + + +conn = psycopg2.connect(database="mydatabase", user="postgres", password="admin", host="localhost", port="5432") + + +cur = conn.cursor() + + +cur.execute('CREATE TABLE IF NOT EXISTS blog (id SERIAL PRIMARY KEY, title TEXT, date DATE)') + + +for i in range(len(titles)): + title = titles[i].get_text() + date = dates[i].get_text() + cur.execute('INSERT INTO blog (title, date) VALUES (%s, %s)', (title, date)) + + +conn.commit() +cur.close() +conn.close() +