delete or lock this thread pls :)
https://www.vlr.gg/287605/vlr-stat-scraper
i've asked to lock it on the discord but no one ever responds....
delete or lock this thread pls :)
https://www.vlr.gg/287605/vlr-stat-scraper
i've asked to lock it on the discord but no one ever responds....
Hades_Loves_Rb [#2]can you show other people how to do it?
I mean its a webscraper, so they just have to really build one
heres some code to get upvotes/downvotes/netvotes and biggest/worst post
you'll have to change up the code to match your case but yeah
import scrapy
import requests
from spider.items import VlrItem
class UserPostsSpider(scrapy.Spider):
name = 'vlr'
allowed_domains = ['vlr.gg']
base_url = 'https://vlr.gg'
def __init__(self, username=None, *args, **kwargs):
super(UserPostsSpider, self).__init__(*args, **kwargs)
self.start_urls = [f'https://vlr.gg/user/{username}']
self.username = username
self.processed_urls = set()
self.user_item = VlrItem(
upvotes=0,
downvotes=0,
netvotes=0,
biggest_upvote=-1,
biggest_downvote=0
)
def parse(self, response):
# get total number of pages
page_links = response.css('a.btn.mod-page::attr(href)').getall()
last_page_number = int(page_links[-1].split('=')[-1]) if page_links else 1
# iterate through all pages
for page_number in range(1, last_page_number + 1):
url = f'/user/{self.username}/?page={page_number}'
yield response.follow(url, self.parse_user_page)
def parse_user_page(self, response):
# getting the link for all the posts on each page
discussion_links = response.css('div.wf-card.ge-text-light a::attr(href)').getall()
for link in discussion_links:
# follow discussion links to extract upvotes and downvotes
yield response.follow(link, self.parse_discussion)
def parse_discussion(self, response):
# check if the user is the original poster
original_post_upvotes, original_post_downvotes = self.user_is_poster(response)
# initialize or update the counts with the original post's counts
if original_post_upvotes != -1 and original_post_downvotes != -1:
self.user_item['upvotes'] += original_post_upvotes
self.user_item['downvotes'] += original_post_downvotes
if original_post_upvotes > 0 and original_post_downvotes == 0:
self.user_item['upvote_count'] += 1
elif original_post_downvotes < 0 and original_post_upvotes == 0:
self.user_item['downvote_count'] += 1
# find the user's comment(s) by the a tag
user_posts = response.css(f'a.post-header-author[href*="/user/{self.username}"]')
post_url_xpath = "./ancestor::div[contains(@class, 'wf-card post')]/div[contains(@class, 'post-footer')]/div[contains(@class, 'noselect')]/a[contains(@class, 'post-action link')]/@href"
for post_author in user_posts:
post_url = self.get_full_url(post_author, post_url_xpath, response)
# check if url is already processed
if post_url in self.processed_urls:
continue
self.processed_urls.add(post_url)
# extract upvote and downvote counts
upvote_count = post_author.xpath('./following-sibling::div[contains(@class,"post-frag-container")]/div[contains(@class,"positive")]/text()').get()
downvote_count = post_author.xpath('./following-sibling::div[contains(@class,"post-frag-container")]/div[contains(@class,"negative")]/text()').get()
upvote_count = int(upvote_count) if upvote_count else 0
downvote_count = int(downvote_count) if downvote_count else 0
# update the user item with the counts
self.user_item['upvotes'] += upvote_count
self.user_item['downvotes'] += downvote_count
self.user_item['netvotes'] = self.user_item['upvotes'] - self.user_item['downvotes']
# track biggest upvote and downvote
if upvote_count > self.user_item['biggest_upvote']:
self.user_item['biggest_upvote'] = upvote_count
if downvote_count > self.user_item['biggest_downvote']:
self.user_item['biggest_downvote'] = downvote_count
yield self.user_item
# check for continue thread links and follow them
continue_links = response.css('a:contains("continue thread")::attr(href)').getall()
for link in continue_links:
yield response.follow(link, self.parse_discussion)
def get_full_url(self, post_author, post_url_xpath, response):
post_url = post_author.xpath(post_url_xpath).get()
return response.urljoin(post_url)
def user_is_poster(self, response):
# extract the username of the original post author
original_post_author = response.xpath('//a[@id="1"]/following-sibling::div[contains(@class, "post-header")]/a[contains(@class, "post-header-author")]/text()').get()
if original_post_author and original_post_author.strip() == self.username:
# the user is the original poster, proceed to get the count
count = response.xpath('//div[@id="thread-frag-count"]/text()').get()
count = int(count.strip()) if count else 0
if count > 0:
return count, 0
elif count < 0:
return 0, count
return 0, 0
else:
# The user is not the original poster
return -1, -1
def closed(self, reason):
requests.post('http://web:8000/update_scrapy_status', data={'task_id': self.username, 'is_completed': True})
cameran [#4]noo dont we need you to save us
lock in cameran take over
the basic code is right there
yukky [#5]lock in cameran take over
the basic code is right there
i dont know what tf im looking at unfortunately, im just like the regular VLR user :(