Scraping Reddit Programmer Humor with Node.js using Puppeteer
/*
    This code comes from Vincent Lab
    And it has a video version linked here: https://www.youtube.com/watch?v=Zb--639XePw
*/
// Import dependencies
const puppeteer = require("puppeteer");
const fs = require("fs");
(async () => {
    // The number of posts. 25 would give you about 100
    const amount = 25;
    // The location / URL
    const url = "https://www.reddit.com/r/ProgrammerHumor/";
    // All of the jokes as key-value pairs
    let jokesObject = {};
    console.log("Getting posts from Reddit");
    // Create the browser
    const browser = await puppeteer.launch({
        headless: false
    });
    // Navigate to the website
    const page = await browser.newPage();
    await page.goto(url, { waitUntil: "load" });
    // Get the root element of all the posts
    const root = (await page.$$(`.rpBJOHq2PR60pnwJlUyP0`))[0];
    // All the posts
    const posts = [];
    // For amount
    for (let i = 0; i < amount; i++) {
        // Get all the posts in this chunk
        const chunk = await (await root.$$("._1poyrkZ7g36PawDueRza-J"));
        // Add all the posts in this chunk to the posts array
        posts.push(...chunk);
        // Wait for 1 second
        await sleep(1000);
        // Scroll to the next chunk
        await page.evaluate(() => {
            window.scrollBy(0, (632 * 12));
        });
    }
    console.log("Extracting jokes from posts");
    // For each post
    for (const post of posts) {
        try {
            // Get the title
            const title = await getProperty(post, "textContent", "_eYtD2XCVieq6emjKBH3m");
            // Get the image jokesObject
            const image = await getProperty(post, "src", "ImageBox-image");
            // Add the post to the jokes object
            jokesObject[title] = { image: image };
        } catch (error) {
        }
    }
    console.log("Converting jokes into an array");
    // Convert the jokes object into an array
    const jokes = [];
    for (const joke in jokesObject) {
        jokes.push({
            title: joke,
            image: jokesObject[joke].image
        })
    }
    console.log("Saving jokes");
    // Save the jokes to a file
    fs.writeFileSync("jokes.json", JSON.stringify(jokes));
    // Close the browser
    await browser.close();
})();
// Get a property on an element from within an object
async function getProperty(rootElement, property, className) {
    const element = (await rootElement.$$(`.${className}`))[0];
    return await (await element.getProperty(property)).jsonValue();
}
// Sleep for x
function sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}
