this is also my code.. and we have the same problem
\# ai\_firefox\_scraper.py (Fixed)
import asyncio
import json
import os
import csv
import time
import random
from pathlib import Path
from playwright.async\_api import async\_playwright
SAVE\_DIR = Path("scraped\_data")
SAVE\_DIR.mkdir(exist\_ok=True)
class FirefoxSmartScraper:
def \_\_init\_\_(self, max\_pages=5, throttle=(4, 8)):
self.max\_pages = max\_pages
self.throttle = throttle
async def search\_and\_scrape(self, topic: str):
async with async\_playwright() as p:
browser = await p.firefox.launch(headless=False)
context = await browser.new\_context()
page = await context.new\_page()
print(f"š Searching DuckDuckGo for: {topic}")
await page.goto("https://duckduckgo.com", timeout=30000)
await page.wait\_for\_selector("input\[name='q'\]")
\# Type like a human
for c in topic:
await page.type("input\[name='q'\]", c, delay=random.randint(100, 200))
await page.keyboard.press("Enter")
await page.wait\_for\_selector("a.result\_\_a", timeout=20000)
await asyncio.sleep(random.uniform(\*self.throttle))
\# Extract real links only
items = await page.query\_selector\_all("a.result\_\_a")
urls = \[\]
for item in items\[:self.max\_pages\]:
try:
title = await item.inner\_text()
href = await item.get\_attribute("href")
\# Ensure it's a valid URL
if href and href.startswith("http"):
urls.append({"title": title.strip(), "url": href})
except Exception as e:
print(f"\[!\] Failed to parse link: {e}")
continue
if not urls:
print("ā No links found.")
await browser.close()
return
print(f"š Visiting {len(urls)} pages...")
scraped = \[\]
for idx, link in enumerate(urls):
print(f"\\nš \[{idx+1}\] {link\['title'\]}")
try:
await page.goto(link\["url"\], timeout=30000)
await asyncio.sleep(random.uniform(\*self.throttle))
content = await page.text\_content("body")
scraped.append({
"title": link\["title"\],
"url": link\["url"\],
"content": content\[:1500\] # Limit content
})
except Exception as e:
print(f"\[!\] Failed to scrape: {link\['url'\]}\\nReason: {e}")
continue
await browser.close()
self.save\_data(topic, scraped)
def save\_data(self, topic: str, data: list):
filename\_json = SAVE\_DIR / f"{topic.replace(' ', '\_')}\_data.json"
filename\_csv = SAVE\_DIR / f"{topic.replace(' ', '\_')}\_data.csv"
\# Save as JSON
with open(filename\_json, "w", encoding="utf-8") as f:
json.dump(data, f, ensure\_ascii=False, indent=2)
\# Save as CSV
with open(filename\_csv, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=\["title", "url", "content"\])
writer.writeheader()
for entry in data:
writer.writerow(entry)
print(f"\\nā Saved {len(data)} entries to:\\n- {filename\_json}\\n- {filename\_csv}")
def main():
topic = input("š Enter topic to crawl web for data: ").strip()
if not topic:
print("ā No topic entered.")
return
scraper = FirefoxSmartScraper()
asyncio.run(scraper.search\_and\_scrape(topic))
if \_\_name\_\_ == "\_\_main\_\_":
main()
this is my code in making an overall web scrapping.. i don't know whats wrong, it doesn't fetch data in the internet. or maybe websites are really protected