0

mods come

Comments:
Threaded Linear
#1
yukky

delete or lock this thread pls :)

https://www.vlr.gg/287605/vlr-stat-scraper

i've asked to lock it on the discord but no one ever responds....

#2
Hades_Loves_Rb
0
Frags
+

can you show other people how to do it?

#3
yukky
0
Frags
+

I mean its a webscraper, so they just have to really build one

heres some code to get upvotes/downvotes/netvotes and biggest/worst post

you'll have to change up the code to match your case but yeah

import scrapy
import requests
from spider.items import VlrItem

class UserPostsSpider(scrapy.Spider):
    name = 'vlr'
    allowed_domains = ['vlr.gg']
    base_url = 'https://vlr.gg'

    def __init__(self, username=None, *args, **kwargs):
        super(UserPostsSpider, self).__init__(*args, **kwargs)
        self.start_urls = [f'https://vlr.gg/user/{username}']
        self.username = username
        self.processed_urls = set()
        self.user_item = VlrItem(
            upvotes=0, 
            downvotes=0, 
            netvotes=0, 
            biggest_upvote=-1, 
            biggest_downvote=0
        )

    def parse(self, response):
        # get total number of pages
        page_links = response.css('a.btn.mod-page::attr(href)').getall()
        last_page_number = int(page_links[-1].split('=')[-1]) if page_links else 1
        # iterate through all pages
        for page_number in range(1, last_page_number + 1):
            url = f'/user/{self.username}/?page={page_number}'
            yield response.follow(url, self.parse_user_page)

    def parse_user_page(self, response):
        # getting the link for all the posts on each page
        discussion_links = response.css('div.wf-card.ge-text-light a::attr(href)').getall()
        for link in discussion_links:
            # follow discussion links to extract upvotes and downvotes
            yield response.follow(link, self.parse_discussion)

    def parse_discussion(self, response):
        # check if the user is the original poster
        original_post_upvotes, original_post_downvotes = self.user_is_poster(response)
        # initialize or update the counts with the original post's counts
        if original_post_upvotes != -1 and original_post_downvotes != -1:
            self.user_item['upvotes'] += original_post_upvotes
            self.user_item['downvotes'] += original_post_downvotes
            if original_post_upvotes > 0 and original_post_downvotes == 0:
                self.user_item['upvote_count'] += 1
            elif original_post_downvotes < 0 and original_post_upvotes == 0:
                self.user_item['downvote_count'] += 1

        # find the user's comment(s) by the a tag
        user_posts = response.css(f'a.post-header-author[href*="/user/{self.username}"]')
        post_url_xpath = "./ancestor::div[contains(@class, 'wf-card post')]/div[contains(@class, 'post-footer')]/div[contains(@class, 'noselect')]/a[contains(@class, 'post-action link')]/@href"                

        for post_author in user_posts:
            post_url = self.get_full_url(post_author, post_url_xpath, response)
            # check if url is already processed
            if post_url in self.processed_urls: 
                continue
            self.processed_urls.add(post_url)

            # extract upvote and downvote counts
            upvote_count = post_author.xpath('./following-sibling::div[contains(@class,"post-frag-container")]/div[contains(@class,"positive")]/text()').get()
            downvote_count = post_author.xpath('./following-sibling::div[contains(@class,"post-frag-container")]/div[contains(@class,"negative")]/text()').get()

            upvote_count = int(upvote_count) if upvote_count else 0
            downvote_count = int(downvote_count) if downvote_count else 0

            # update the user item with the counts
            self.user_item['upvotes'] += upvote_count
            self.user_item['downvotes'] += downvote_count
            self.user_item['netvotes'] = self.user_item['upvotes'] - self.user_item['downvotes']

            # track biggest upvote and downvote
            if upvote_count > self.user_item['biggest_upvote']:
                self.user_item['biggest_upvote'] = upvote_count
            if downvote_count > self.user_item['biggest_downvote']:
                self.user_item['biggest_downvote'] = downvote_count

        yield self.user_item

        # check for continue thread links and follow them
        continue_links = response.css('a:contains("continue thread")::attr(href)').getall()
        for link in continue_links:
            yield response.follow(link, self.parse_discussion)

    def get_full_url(self, post_author, post_url_xpath, response):
        post_url = post_author.xpath(post_url_xpath).get()
        return response.urljoin(post_url)

    def user_is_poster(self, response):
        # extract the username of the original post author
        original_post_author = response.xpath('//a[@id="1"]/following-sibling::div[contains(@class, "post-header")]/a[contains(@class, "post-header-author")]/text()').get()
        if original_post_author and original_post_author.strip() == self.username:
            # the user is the original poster, proceed to get the count
            count = response.xpath('//div[@id="thread-frag-count"]/text()').get()
            count = int(count.strip()) if count else 0
            if count > 0:
                return count, 0
            elif count < 0:
                return 0, count
            return 0, 0
        else:
            # The user is not the original poster
            return -1, -1

    def closed(self, reason):
        requests.post('http://web:8000/update_scrapy_status', data={'task_id': self.username, 'is_completed': True})
#4
cameran
0
Frags
+

noo dont we need you to save us

#5
yukky
0
Frags
+

lock in cameran take over

the basic code is right there

#6
cameran
0
Frags
+

i dont know what tf im looking at unfortunately, im just like the regular VLR user :(

  • Preview
  • Edit
› check that that your post follows the forum rules and guidelines or get formatting help
Sign up or log in to post a comment