Infinitely scrolling feed
Target page: https://slensky.com/zendriver-examples/scrollable-cards.html
In this tutorial, we will demonstrate how to scrape a page with an infinitely scrolling feed. Before we get started, check out the live website to get an idea of what we will be working with!
Initial setup
Begin by creating a new script for the tutorial:
import asyncio
import zendriver as zd
async def main() -> None:
browser = await zd.start()
page = await browser.get(
"https://slensky.com/zendriver-examples/scrollable-cards.html",
)
# Not yet loaded, so empty
card_container = await page.select("#card-container")
cards = card_container.children
print(cards) # []
await browser.stop()
if __name__ == "__main__":
asyncio.run(main())
In this first version of the code, we do not wait for the cards to load before trying to print them out, so the printed list will always be empty.
Waiting for cards to appear
To solve this, we need to wait for the cards to load before printing them:
import asyncio
import zendriver as zd
from zendriver import Element, Tab
async def wait_for_cards(page: Tab, initial_card_count: int) -> list[Element]:
while True:
card_container = await page.select("#card-container")
cards = card_container.children
if len(cards) > initial_card_count:
return cards
await asyncio.sleep(0.5)
async def main() -> None:
browser = await zd.start()
page = await browser.get(
"https://slensky.com/zendriver-examples/scrollable-cards.html",
)
# Wait for cards to load
cards = await wait_for_cards(page, initial_card_count=0)
# Now we can print the cards
# (shows first 10 cards: Card 1, Card 2...Card 9, Card 10)
for card in cards:
print(card.text)
await browser.stop()
if __name__ == "__main__":
asyncio.run(main())
The above change was a step in the right direction, but what if we want to keep scrolling down until we find the lucky card?
Finding the lucky card
In this final version of the script, we continuously scroll down to the bottom of the page, waiting for new sets of cards to appear until we find the lucky card.
import asyncio
import zendriver as zd
from zendriver import Element, Tab
async def wait_for_cards(page: Tab, initial_card_count: int) -> list[Element]:
while True:
card_container = await page.select("#card-container")
cards = card_container.children
if len(cards) > initial_card_count:
print("Loaded new cards. Current count:", len(cards))
return cards
await asyncio.sleep(0.5)
def get_lucky_card(cards: list[Element]) -> Element | None:
for card in cards:
if "Congratulations, you found the lucky card!" in card.text_all:
return card
return None
async def main() -> None:
browser = await zd.start()
page = await browser.get(
"https://slensky.com/zendriver-examples/scrollable-cards.html",
)
# Wait for the first batch of cards to load
cards = await wait_for_cards(page, initial_card_count=0)
# Loop until we find the lucky card
while (lucky_card := get_lucky_card(cards)) is None:
# Scroll to the bottom of the page
await page.scroll_down(1000) # 10x page height, likely to be enough
# Get the new cards
cards = await wait_for_cards(page, initial_card_count=len(cards))
if lucky_card:
print(f"Lucky card found: Card {cards.index(lucky_card) + 1}")
await browser.stop()
if __name__ == "__main__":
asyncio.run(main())