def fetch_url(url: str) -> requests.Response: """Perform a GET request with sane defaults.""" headers = "User-Agent": USER_AGENT resp = requests.get( url, headers=headers, timeout=TIMEOUT_SECONDS, allow_redirects=True, stream=False, ) resp.raise_for_status() # raise HTTPError for 4xx/5xx return resp
if __name__ == "__main__": main() python quick_site_overview.py https://xnexx.hot The output will look something like: xnexx hot
report = "url": url, "final_url": resp.url, "status_code": resp.status_code, "title": title, "meta_description": meta_desc, "og_title": og_title, "og_description": og_desc, "keywords": keywords, "is_adult_content": adult_flag, "content_length_bytes": len(resp.content), def fetch_url(url: str) -> requests
TIMEOUT_SECONDS = 8 MAX_REDIRECTS = 5 USER_AGENT = ( "Mozilla/5.0 (compatible; QuickSiteOverview/1.0; +https://example.com/bot)" ) def fetch_url(url: str) ->
def is_adult_content(text: str) -> bool: """Very naive adult‑content detection based on keyword presence.""" text_low = text.lower() return any(word in text_low for word in ADULT_KEYWORDS)
raw_url = sys.argv[1] # Ensure we have a scheme – requests needs it. parsed = urlparse(raw_url) if not parsed.scheme: raw_url = "https://" + raw_url