Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Extractor! #33

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mediaflow_proxy/extractors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ async def _make_request(
async with create_httpx_client() as client:
request_headers = self.base_headers
request_headers.update(headers or {})
print(request_headers)
response = await client.request(
method,
url,
Expand Down
2 changes: 2 additions & 0 deletions mediaflow_proxy/extractors/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from mediaflow_proxy.extractors.uqload import UqloadExtractor
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor



Expand All @@ -20,6 +21,7 @@ class ExtractorFactory:
"Mixdrop": MixdropExtractor,
"Streamtape": StreamtapeExtractor,
"Supervideo": SupervideoExtractor,
"VixCloud": VixCloudExtractor,
"LiveTV": LiveTVExtractor,
}

Expand Down
65 changes: 65 additions & 0 deletions mediaflow_proxy/extractors/vixcloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import re
from typing import Dict, Any
from bs4 import BeautifulSoup, SoupStrainer
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
import json
from urllib.parse import urlparse, parse_qs


class VixCloudExtractor(BaseExtractor):
"""VixCloud URL extractor."""

async def version(self, domain: str) -> str:
"""Get version of VixCloud Parent Site."""
DOMAIN = domain
base_url = f"https://streamingcommunity.{DOMAIN}/richiedi-un-titolo"
response = await self._make_request(
base_url,
headers={
"Referer": f"https://streamingcommunity.{DOMAIN}/",
"Origin": f"https://streamingcommunity.{DOMAIN}",
},
)
if response.status_code != 200:
raise ExtractorError("Outdated Domain")
# Soup the response
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
if soup:
# Extract version
version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))["version"]
return version

Comment on lines +12 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve error handling in 'version' method.
Currently, if the JSON structure or the "version" key is missing, the code will raise an unhandled exception. Consider adding a try-except block around the JSON parsing and key access. This ensures the method fails gracefully if the response format changes.

     soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
     if soup:
-        version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))["version"]
-        return version
+        try:
+            data = json.loads(soup.find("div", {"id": "app"}).get("data-page"))
+            return data["version"]
+        except (KeyError, json.JSONDecodeError, AttributeError) as e:
+            raise ExtractorError(f"Failed to parse version: {e}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def version(self, domain: str) -> str:
"""Get version of VixCloud Parent Site."""
DOMAIN = domain
base_url = f"https://streamingcommunity.{DOMAIN}/richiedi-un-titolo"
response = await self._make_request(
base_url,
headers={
"Referer": f"https://streamingcommunity.{DOMAIN}/",
"Origin": f"https://streamingcommunity.{DOMAIN}",
},
)
if response.status_code != 200:
raise ExtractorError("Outdated Domain")
# Soup the response
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
if soup:
# Extract version
version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))["version"]
return version
async def version(self, domain: str) -> str:
"""Get version of VixCloud Parent Site."""
DOMAIN = domain
base_url = f"https://streamingcommunity.{DOMAIN}/richiedi-un-titolo"
response = await self._make_request(
base_url,
headers={
"Referer": f"https://streamingcommunity.{DOMAIN}/",
"Origin": f"https://streamingcommunity.{DOMAIN}",
},
)
if response.status_code != 200:
raise ExtractorError("Outdated Domain")
# Soup the response
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
if soup:
try:
data = json.loads(soup.find("div", {"id": "app"}).get("data-page"))
return data["version"]
except (KeyError, json.JSONDecodeError, AttributeError) as e:
raise ExtractorError(f"Failed to parse version: {e}")

async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Vixcloud URL."""
domain = url.split("://")[1].split("/")[0].split(".")[1]
version = await self.version(domain)
response = await self._make_request(url, headers={"x-inertia": "true", "x-inertia-version": version})
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("iframe"))
iframe = soup.find("iframe").get("src")
parsed_url = urlparse(iframe)
query_params = parse_qs(parsed_url.query)
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})

if response.status_code != 200:
raise ExtractorError("Failed to extract URL components, Invalid Request")
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
if soup:
script = soup.find("body").find("script").text
token = re.search(r"'token':\s*'(\w+)'", script).group(1)
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
quality = re.search(r'"quality":(\d+)', script).group(1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Remove unused variables 'quality', 'canPlayFHD', and 'b'.
These variables are flagged by static analysis as unused. Removing them reduces clutter and improves maintainability.

-            quality = re.search(r'"quality":(\d+)', script).group(1)
...
-            canPlayFHD = "h=1"
...
-            b = "b=1"

Also applies to: 55-55, 58-58

🧰 Tools
🪛 Ruff (0.8.2)

50-50: Local variable quality is assigned to but never used

Remove assignment to unused variable quality

(F841)

Comment on lines +46 to +50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Based on the code review, I can now generate the final response:

Add error handling for regex matches to prevent NoneType errors

  • The regex matches for 'token', 'expires', and 'quality' in vixcloud.py are performed without any error handling
  • Since the class already inherits from BaseExtractor which has ExtractorError, use it to handle failed matches
  • Add checks before accessing .group(1) for all three regex patterns:
token_match = re.search(r"'token':\s*'(\w+)'", script)
if not token_match:
    raise ExtractorError("Failed to extract token from script")
token = token_match.group(1)
🔗 Analysis chain

Confirm presence of 'token', 'expires', and 'quality' in the script.
Where regex matching fails, you may encounter a NoneType error. Validate these matches before calling '.group(1)' to avoid run-time failures.


Let me verify the presence of error handling for these regex matches by searching through the codebase.

🏁 Scripts executed

The following scripts were executed for the analysis:

Error: No shell scripts found


Script:

#!/bin/bash
# Search for the file content to understand the context and error handling
cat mediaflow_proxy/extractors/vixcloud.py

Length of output: 3011

🧰 Tools
🪛 Ruff (0.8.2)

50-50: Local variable quality is assigned to but never used

Remove assignment to unused variable quality

(F841)

vixid = iframe.split("/embed/")[1].split("?")[0]
base_url = iframe.split("://")[1].split("/")[0]
final_url = f"https://{base_url}/playlist/{vixid}.m3u8?token={token}&expires={expires}"
if "canPlayFHD" in query_params:
canPlayFHD = "h=1"
final_url += "&h=1"
if "b" in query_params:
b = "b=1"
final_url += "&b=1"
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
2 changes: 1 addition & 1 deletion mediaflow_proxy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class MPDSegmentParams(GenericParams):


class ExtractorURLParams(GenericParams):
host: Literal["Doodstream", "Mixdrop", "Uqload", "Streamtape", "Supervideo", "LiveTV"] = Field(
host: Literal["Doodstream", "Mixdrop", "Uqload", "Streamtape", "Supervideo", "VixCloud", "LiveTV"] = Field(
..., description="The host to extract the URL from."
)
destination: str = Field(..., description="The URL of the stream.", alias="d")
Expand Down
Loading