Skip to content

Infinitely scrolling feed

Target page: https://slensky.com/zendriver-examples/scrollable-cards.html

In this tutorial, we will demonstrate how to scrape a page with an infinitely scrolling feed. Before we get started, check out the live website to get an idea of what we will be working with!

Initial setup

Begin by creating a new script for the tutorial:

import asyncio

import zendriver as zd


async def main() -> None:
    browser = await zd.start()
    page = await browser.get(
        "https://slensky.com/zendriver-examples/scrollable-cards.html",
    )

    # Not yet loaded, so empty
    card_container = await page.select("#card-container")
    cards = card_container.children
    print(cards)  # []

    await browser.stop()


if __name__ == "__main__":
    asyncio.run(main())

In this first version of the code, we do not wait for the cards to load before trying to print them out, so the printed list will always be empty.

Waiting for cards to appear

To solve this, we need to wait for the cards to load before printing them:

import asyncio

import zendriver as zd
from zendriver import Element, Tab


async def wait_for_cards(page: Tab, initial_card_count: int) -> list[Element]:
    while True:
        card_container = await page.select("#card-container")
        cards = card_container.children
        if len(cards) > initial_card_count:
            return cards
        await asyncio.sleep(0.5)


async def main() -> None:
    browser = await zd.start()
    page = await browser.get(
        "https://slensky.com/zendriver-examples/scrollable-cards.html",
    )

    # Wait for cards to load
    cards = await wait_for_cards(page, initial_card_count=0)

    # Now we can print the cards
    # (shows first 10 cards: Card 1, Card 2...Card 9, Card 10)
    for card in cards:
        print(card.text)

    await browser.stop()


if __name__ == "__main__":
    asyncio.run(main())

The above change was a step in the right direction, but what if we want to keep scrolling down until we find the lucky card?

Finding the lucky card

In this final version of the script, we continuously scroll down to the bottom of the page, waiting for new sets of cards to appear until we find the lucky card.

import asyncio

import zendriver as zd
from zendriver import Element, Tab


async def wait_for_cards(page: Tab, initial_card_count: int) -> list[Element]:
    while True:
        card_container = await page.select("#card-container")
        cards = card_container.children
        if len(cards) > initial_card_count:
            print("Loaded new cards. Current count:", len(cards))
            return cards
        await asyncio.sleep(0.5)


def get_lucky_card(cards: list[Element]) -> Element | None:
    for card in cards:
        if "Congratulations, you found the lucky card!" in card.text_all:
            return card

    return None


async def main() -> None:
    browser = await zd.start()
    page = await browser.get(
        "https://slensky.com/zendriver-examples/scrollable-cards.html",
    )

    # Wait for the first batch of cards to load
    cards = await wait_for_cards(page, initial_card_count=0)

    # Loop until we find the lucky card
    while (lucky_card := get_lucky_card(cards)) is None:
        # Scroll to the bottom of the page
        await page.scroll_down(1000)  # 10x page height, likely to be enough

        # Get the new cards
        cards = await wait_for_cards(page, initial_card_count=len(cards))

    if lucky_card:
        print(f"Lucky card found: Card {cards.index(lucky_card) + 1}")

    await browser.stop()


if __name__ == "__main__":
    asyncio.run(main())