下载樱花动漫上的动漫
介绍
现在,樱花动漫上广告(色情)非常多,借助下面的代码就可以让我们在命令行中下载动漫,然后在本地进行观看。
下载的视频文件默认保存在代码所在目录下的animations/animation_name/episode_number.mp4
。其中 animation_name
是动漫的全名,episode_number
是对应的集数。
代码
代码其实很好懂,就不写详细的注释了。不过,最好就着樱花动漫网站的源码食用,否则不可能看得懂,不过真的有人会仔细看嘛?(笑)
from urllib.parse import quote
from bs4 import BeautifulSoup
import requests
import re
import json
import os
REQUEST_TIMEOUT = 20
DOWNLOAD_CHUNK_SIZE = 1024 * 1024
class Animation(object):
def __init__(self, a):
self.title = a['title']
self.number = a['href'].replace('/view/', '').replace('.html', '')
def getAnimationList(searchRequest):
soup = BeautifulSoup(searchRequest.content.decode("gbk").encode("utf-8"), 'html.parser')
divs = soup.find_all('div', attrs={'class': 'pics'})
assert len(divs) == 2, '无法获取动漫列表!'
return [Animation(li.h2.a) for li in divs[0].ul.find_all('li')]
def selectAnimation(animationList):
animationCount = len(animationList)
assert animationCount > 0, "未找到动漫"
for i in range(animationCount):
print(f"{i + 1}.{animationList[i].title}")
index = -1
while index >= animationCount or index < 0:
try:
index = int(input('选择一个: ')) - 1
except:
continue
return animationList[index]
def getEpisodeList(animation):
r = requests.get(f'http://www.imomoe.in/player/{animation.number}-0-0.html', timeout=REQUEST_TIMEOUT)
match = re.search(r'"(/playdata/(\d+)/(\d+)(\.)js(\?)(\d+)(\.)(\d+))"', r.content.decode("gbk"))
assert match, '无法获取集数列表地址!'
r = requests.get('http://www.imomoe.in' + match.group(1), timeout=REQUEST_TIMEOUT)
assert r, '无法获取集数列表!'
js = re.sub(r",urlinfo='http://'\+document.domain\+'/player/(\d+)-<from>-<pos>.html';", "", r.content.decode("gbk")
.replace('var VideoListJson=', '')).replace("'", '"') # !转成合法json
return json.loads(js)[0][1]
def selectEpisodes(episodeList):
episodeCount = len(episodeList)
assert episodeCount > 0, "未找到任何一集"
print(f'共{episodeCount}集')
while True:
inputText = input('选择下载集数(多个使用单个空格分隔,输入"all"下载全部):')
if inputText.strip().lower() == 'all':
return range(episodeCount)
try:
return [(int(i) - 1) for i in inputText.split(' ')]
except:
continue
def processBar(percent, startText='', endText='100%', totalLength=30):
bar = ''.join(["#"] * int(percent * totalLength)) + ''
bar = '\r' + startText + bar.ljust(totalLength) + ' {:0>4.1f}%|'.format(percent * 100) + endText
print(bar, end='', flush=True)
def downloadVideo(url, path):
r = requests.get(url, stream=True, timeout=REQUEST_TIMEOUT)
assert r.ok, r.reason
contentLength = len(r.content)
print(f"文件大小:{contentLength}")
downloadedLength = 0.0
with open(path, "wb") as mp4:
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
if chunk:
mp4.write(chunk)
downloadedLength += len(chunk)
processBar(downloadedLength / contentLength)
print()
if __name__ == '__main__':
searchWord = quote(input('搜索: ').encode('gb2312'))
searchRequest = requests.get('http://www.imomoe.in/search.asp?searchword=' + searchWord, timeout=REQUEST_TIMEOUT)
animationList = getAnimationList(searchRequest)
animation = selectAnimation(animationList)
episodeList = getEpisodeList(animation)
episodeIndices = selectEpisodes(episodeList)
directory = os.path.join(os.path.dirname(__file__), 'animations', animation.title)
if not os.path.exists(directory):
os.makedirs(directory)
print()
for i in episodeIndices:
print(f'开始下载第{i + 1}集')
try:
if i >= len(episodeList) or i < 0:
raise ValueError()
downloadVideo(episodeList[i].split('$')[1], os.path.join(directory, str(i + 1) + '.mp4').strip())
print('下载成功')
except Exception as e:
print(e)
print('下载失败')
print()
os.system('pause')