Skip to content

Commit

Permalink
fix: revert
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Dec 10, 2024
1 parent b5978b4 commit 22a8e05
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 76 deletions.
3 changes: 0 additions & 3 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ Is it possible to run in local this project using python with the command on you
```bash
streamlit run main.py
```
## News 📰

- ScrapegraphAI has now his APIs! Check it out [here](https://scrapegraphai.com)!

## 🤝 Contributing

Expand Down
2 changes: 1 addition & 1 deletion current_value.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
20866
13154
57 changes: 30 additions & 27 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,12 @@
playwright_install,
add_download_options
)
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

st.set_page_config(page_title="Scrapegraph-ai demo", page_icon="🕷️")

# Install playwright browsers
playwright_install()

# Initialize logger
sgai_logger.set_logging(level="INFO")

def save_email(email):
with open("mails.txt", "a") as file:
file.write(email + "\n")
Expand All @@ -45,35 +40,43 @@ def save_email(email):
left_co, cent_co, last_co = st.columns(3)
with cent_co:
st.image("assets/scrapegraphai_logo.png")
st.title('Scrapegraph-api')
st.write("refill at this page")

# Use password input for API key to mask it
st.write("### You can buy the API credits [here](https://scrapegraphai.com)")

api_key = st.text_input('Enter your API key:', type="password", help="API key must start with 'sgai-'")
# Get the API key, URL, prompt, and optional schema from the user
api_key = st.text_input('Enter your API key:')
url = st.text_input('Enter the URL to scrape:')
prompt = st.text_input('Enter your prompt:')
schema = st.text_input('Enter your optional schema (leave blank if not needed):')

# When the user clicks the 'Scrape' button
if st.button('Scrape'):
if not api_key.startswith('sgai-'):
st.error("Invalid API key format. API key must start with 'sgai-'")
elif not url:
st.error("Please enter a URL to scrape")
elif not prompt:
st.error("Please enter a prompt")
# Set up the headers and payload for the API request
headers = {'Content-Type': 'application/json'}
payload = {
'api_key': api_key,
'url': url,
'prompt': prompt,
'schema': schema
}

# Make the API request
response = requests.post('https://api.scrapegraphai.com/smart_scraper', headers=headers, data=json.dumps(payload))

# If the request was successful
if response.status_code == 200:
# Parse the JSON response
data = response.json()

# Display the extracted data
st.write(data['result'])

# Display the remaining credits
st.write(f"Remaining credits: {data['credits_left']}")

# If the request was unsuccessful
else:
try:
sgai_client = Client(api_key=api_key)
response = sgai_client.smartscraper(
website_url=url,
user_prompt=prompt
)
st.write(f"Request ID: {response['request_id']}")
st.write(f"Result: {response['result']}")
except Exception as e:
st.error(f"Error: {str(e)}")
finally:
sgai_client.close()
st.write(f"Error: {response.status_code}")


left_co2, *_, cent_co2, last_co2, last_c3 = st.columns([1] * 18)
Expand Down
55 changes: 21 additions & 34 deletions pages/2_🦿_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,36 @@
from text_to_speech import text_to_speech

key = st.text_input("Openai API key", type="password")
model = st.radio(
"Select the model",
["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4", "text-to-speech", "gpt-4o", "gpt-4o-mini"],
index=0,
)

url = st.text_input("base url (optional)")
link_to_scrape = st.text_input("Link to scrape")
prompt = st.text_input("Write the prompt")

def add_download_options(result):
"""
Adds download options for the results
Args:
result: The result dataframe to make downloadable
"""
if result:
# Convert result to CSV
csv = result.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name="scraping_results.csv",
mime="text/csv"
)

# Optionally, also add JSON option
json_str = result.to_json(orient="records")
st.download_button(
label="Download JSON",
data=json_str,
file_name="scraping_results.json",
mime="application/json"
)

if st.button("Run the program", type="primary"):
if not key or not link_to_scrape or not prompt:
if not key or not model or not link_to_scrape or not prompt:
st.error("Please fill in all fields except the base URL, which is optional.")
else:
st.write("Scraping phase started ...")

if url:
graph_result = task(key, link_to_scrape, prompt, base_url=url)
if model == "text-to-speech":
res = text_to_speech(key, prompt, link_to_scrape)
st.write(res["answer"])
st.audio(res["audio"])
else:
graph_result = task(key, link_to_scrape, prompt)
# Pass url only if it's provided
if url:
graph_result = task(key, link_to_scrape, prompt, model, base_url=url)
else:
graph_result = task(key, link_to_scrape, prompt, model)

print(graph_result)
st.write("# Answer")
st.write(graph_result)
print(graph_result)
st.write("# Answer")
st.write(graph_result)

if graph_result:
add_download_options(graph_result)
if graph_result:
add_download_options(graph_result)
11 changes: 4 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
boto3==1.35.36
langchain_core==0.3.21
pandas==2.2.3
Requests==2.32.3
scrapegraph-py==1.9.0b2
scrapegraphai==1.32.0
streamlit==1.39.0
pandas>=2.2.2
scrapegraphai==1.12.0
scrapegraphai
streamlit>=1.0
11 changes: 7 additions & 4 deletions task.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@ def task(key:str, url:str, prompt:str, model:str, base_url=None):
graph_config = {
"llm": {
"api_key": key,
"model": "openai/gpt-4",
"model": model,
"openai_api_base": base_url,
},
}

print(prompt)
print(url)
print(graph_config)
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

smart_scraper_graph = SmartScraperGraph(
prompt=prompt,
# also accepts a string with the already downloaded HTML code
source=url,
config=graph_config
)
Expand Down

0 comments on commit 22a8e05

Please sign in to comment.