'Scraped youtube comments amount and real amount are different

Im new to Python and Im trying to code a commentscraper for youtube with the most important informations, which I put in a JSON-file. But the my amount of comments and replys is not the same as on Youtube. I don't know, where my error is. I recognized, that it doesn't write any data in the files, if there are less than 20 comments, but I don't know, where I have to change something...
Example:

https://youtu.be/Re1m9O7q-9U here I get 102, but it should be 107

https://youtu.be/Q9Y5m1fQ7Fk here I get 423, but it should be 486

https://youtu.be/cMhE5BfmFkM here I get 1315, but it should be 2052

Here is the code:

class YT_Comments:
def __init__(self, api_key):
    self.api_key = api_key
    self.comment_int = 0


def get_video_comments(self, video_id, limit):
    url = f"https://youtube.googleapis.com/youtube/v3/commentThreads?part=replies%2C%20snippet&order=relevance&videoId={video_id}&key={self.api_key}"
    vid_comments = []
    pc, npt = self._get_comments_per_page(url)
    if limit is not None and isinstance(limit, int):
        url += f"&maxResults={str(limit)}"
    while (npt is not None):
        nexturl = url + "&pageToken=" + npt
        pc, npt = self._get_comments_per_page(nexturl)
        vid_comments.append(pc)
    print(self.comment_int)
    print(len(vid_comments))
    return vid_comments

def _get_comments_per_page(self, url):
    json_url = requests.get(url)
    data = json.loads(json_url.text)
    page_comments = []
    if "items" not in data:
        return page_comments, None
    item_data = data["items"]
    nextPageToken = data.get("nextPageToken", None)
    for item in tqdm.tqdm(item_data):
        try:
            kind = item["kind"]
            if kind == "youtube#comment" or "youtube#commentThread":
                comment_text = item["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
                comment_author = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
                author_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"]
                comment_like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
                comment_date = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"]
                comment = {"comment_text" : comment_text,
                           "comment_author" : comment_author,
                           "comment_author_id" : author_id,
                           "comment_like_count" : comment_like_count,
                           "comment_date" : comment_date}
                replies_l = []
                self.comment_int += 1
                try:
                    replies = item["replies"]["comments"]

                    for reply in replies:
                        reply_txt = reply["snippet"]["textOriginal"]
                        reply_author = reply["snippet"]["authorDisplayName"]
                        reply_author_id = reply["snippet"]["authorChannelId"]["value"]
                        reply_like_count = reply["snippet"]["likeCount"]
                        reply_date = reply["snippet"]["publishedAt"]
                        reply_dict = {"text" : reply_txt,
                                      "author" : reply_author,
                                      "author_id" : reply_author_id,
                                      "likes" : reply_like_count,
                                      "date" : reply_date}
                        replies_l.append(reply_dict)
                        self.comment_int +=1


                except KeyError:
                    replies_l.append(None)

                comment_dict = {
                    "comment": comment,
                    "replies": replies_l,
                }
                page_comments.append(comment_dict)

        except KeyError:
            print("No Comments")


    return page_comments, nextPageToken

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'Scraped youtube comments amount and real amount are different

Sources

Related Questions