mirror of
https://github.com/saurabhan/Wallhaven-dl.git
synced 2026-01-03 08:28:01 +00:00
Updates to work with Wallhaven Post Alpha
Wallhaven recently came out of aplha, making many changes to their website. I've updated the scraper to make it work again. This Rewrites part of the script to work with the new API. Currently the only way to get NSFW images in search results is to use the API access along with an API Key. The updated scraper now uses the API to load the search results page / categories page/ lastest page. Users dont require API access to view full resolution NSFW Images, so we can access all the images through the original method. We use the original method grab the Full Resoultion Photo, except this time we have the actual image path so we dont need to guess the file extension. The API requires an API key which you can get by creating a Wallhaven Account. The API Key is under User Menu->Settings->Account. Instead of showing the lastest page it will now show the toplist page for the past month as we cant access the lastest page with the API. The API is limited to 45 calls per minute, but as we only call the API to load new pages (of 24 images) and not to download the actual images, this should limit should never be reached.
This commit is contained in:
@@ -4,29 +4,25 @@
|
|||||||
# #
|
# #
|
||||||
# Author - Saurabh Bhan #
|
# Author - Saurabh Bhan #
|
||||||
# #
|
# #
|
||||||
# dated- 26 June 2016 #
|
# Dated- 26 June 2016 #
|
||||||
# Update - 29 June 2016 #
|
# Update - 11 June 2019 #
|
||||||
########################################################
|
########################################################
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import getpass
|
import getpass
|
||||||
import bs4
|
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
import tqdm
|
import tqdm
|
||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
|
import json
|
||||||
|
|
||||||
os.makedirs('Wallhaven', exist_ok=True)
|
os.makedirs('Wallhaven', exist_ok=True)
|
||||||
BASEURL=""
|
BASEURL=""
|
||||||
cookies=dict()
|
cookies=dict()
|
||||||
|
|
||||||
def login():
|
global APIKEY
|
||||||
global cookies
|
APIKEY = "EnterYourAPIKeyHere"
|
||||||
print('NSFW images require login')
|
|
||||||
username = input('Enter username: ')
|
|
||||||
password = getpass.getpass('Enter password: ')
|
|
||||||
cookies = requests.post('https://alpha.wallhaven.cc/auth/login', data={'username':username, 'password':password}).cookies
|
|
||||||
|
|
||||||
def category():
|
def category():
|
||||||
global BASEURL
|
global BASEURL
|
||||||
@@ -63,51 +59,46 @@ def category():
|
|||||||
ptags = {'sfw':'100', 'sketchy':'010', 'nsfw':'001', 'ws':'110', 'wn':'101', 'sn':'011', 'all':'111'}
|
ptags = {'sfw':'100', 'sketchy':'010', 'nsfw':'001', 'ws':'110', 'wn':'101', 'sn':'011', 'all':'111'}
|
||||||
ptag = ptags[pcode]
|
ptag = ptags[pcode]
|
||||||
|
|
||||||
if pcode in ['nsfw', 'wn', 'sn', 'all']:
|
BASEURL = 'https://wallhaven.cc/api/v1/search?apikey=' + APIKEY + "&categories=" +\
|
||||||
login()
|
ctag + '&purity=' + ptag + '&apikey=zsUUdO08GzEyf8q7wrVqY3hNKrwKWFSm' + '&page='
|
||||||
|
|
||||||
BASEURL = 'https://alpha.wallhaven.cc/search?categories=' + \
|
|
||||||
ctag + '&purity=' + ptag + '&page='
|
|
||||||
|
|
||||||
def latest():
|
def latest():
|
||||||
global BASEURL
|
global BASEURL
|
||||||
print('Downloading latest')
|
print('Downloading latest')
|
||||||
BASEURL = 'https://alpha.wallhaven.cc/latest?page='
|
topListRange = '1M'
|
||||||
|
BASEURL = 'https://wallhaven.cc/api/v1/search?apikey=' + APIKEY + '&topRange=' +\
|
||||||
|
topListRange + '&sorting=toplist&page='
|
||||||
|
|
||||||
def search():
|
def search():
|
||||||
global BASEURL
|
global BASEURL
|
||||||
query = input('Enter search query: ')
|
query = input('Enter search query: ')
|
||||||
BASEURL = 'https://alpha.wallhaven.cc/search?q=' + \
|
BASEURL = 'https://wallhaven.cc/api/v1/search?apikey=' + APIKEY + '&q=' + \
|
||||||
urllib.parse.quote_plus(query) + '&page='
|
urllib.parse.quote_plus(query) + '&page='
|
||||||
|
|
||||||
def downloadPage(pageId, totalImage):
|
def downloadPage(pageId, totalImage):
|
||||||
url = BASEURL + str(pageId)
|
url = BASEURL + str(pageId)
|
||||||
urlreq = requests.get(url, cookies=cookies)
|
urlreq = requests.get(url, cookies=cookies)
|
||||||
soup = bs4.BeautifulSoup(urlreq.text, 'lxml')
|
pagesImages = json.loads(urlreq.content);
|
||||||
soupid = soup.findAll('a', {'class': 'preview'})
|
pageData = pagesImages["data"]
|
||||||
res = re.compile(r'\d+')
|
|
||||||
imgId = res.findall(str(soupid))
|
for i in range(len(pageData)):
|
||||||
imgext = ['jpg', 'png', 'bmp']
|
currentImage = (((pageId - 1) * 24) + (i + 1))
|
||||||
for imgIt in range(len(imgId)):
|
|
||||||
currentImage = (((pageId - 1) * 24) + (imgIt + 1))
|
url = pageData[i]["path"]
|
||||||
filename = 'wallhaven-%s.' % imgId[imgIt]
|
|
||||||
url = 'https://wallpapers.wallhaven.cc/wallpapers/full/%s' % filename
|
filename = os.path.basename(url)
|
||||||
for ext in imgext:
|
osPath = os.path.join('Wallhaven', filename)
|
||||||
iurl = url + ext
|
if not os.path.exists(osPath):
|
||||||
osPath = os.path.join('Wallhaven', filename)
|
imgreq = requests.get(url, cookies=cookies)
|
||||||
if not os.path.exists(osPath):
|
if imgreq.status_code == 200:
|
||||||
imgreq = requests.get(iurl, cookies=cookies)
|
print("Downloading : %s - %s / %s" % (filename, currentImage , totalImage))
|
||||||
if imgreq.status_code == 200:
|
with open(osPath, 'ab') as imageFile:
|
||||||
print("Downloading : %s - %s / %s" % (filename, currentImage , totalImage))
|
for chunk in imgreq.iter_content(1024):
|
||||||
with open(osPath, 'ab') as imageFile:
|
imageFile.write(chunk)
|
||||||
for chunk in imgreq.iter_content(1024):
|
elif (imgreq.status_code != 403 and imgreq.status_code != 404):
|
||||||
imageFile.write(chunk)
|
print("Unable to download %s - %s / %s" % (filename, currentImage , totalImage))
|
||||||
break
|
else:
|
||||||
elif (imgreq.status_code != 403 and imgreq.status_code != 404):
|
print("%s already exist - %s / %s" % (filename, currentImage , totalImage))
|
||||||
print("Unable to download %s - %s / %s" % (filename, currentImage , totalImage))
|
|
||||||
else:
|
|
||||||
print("%s already exist - %s / %s" % (filename, currentImage , totalImage))
|
|
||||||
break
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
Choice = input('''Choose how you want to download the image:
|
Choice = input('''Choose how you want to download the image:
|
||||||
|
|||||||
Reference in New Issue
Block a user