你可能会说这个写的什么玩意;尝试压缩代码,怕太长不能发出来,只能弄到1900字。
用过代理抓取DLSite页面,然后修改文件名。代理默认是
socks5://localhost:10808, V2RayN 默认开启。
第三方库依赖:
复制代码- pip install lxml pysocks requests beautifulsoup4
|
顺便把目录下只有一个文件夹的目录结构进行调整,减少了没有必要打开目录。
例子:
[RJ123456] FolderA
- Subfolder1
- Subfolder2
- SubfolderN
- OtherFolders
- Files
- MoreFiles
调整为:
[RJ123456] FolderA
- OtherFolders
- Files
- MoreFiles
我在下后知后觉dalao整理的2T音声。看上去挺多人在下载音声,我就把这脚本发出来了。
复制代码 import os, re, random, shutil, string import requests from bs4 import BeautifulSoup
def safe_filename(fname): for k, v in { '<': '<', '>': '>', ':': ':', '"': '"', '/': '/', '\\': '\', '|': '|', '?': '?', '*': '*', }.items(): fname = fname.replace(k, v) return fname
def move_to_parent(fp): fp = os.path.abspath(fp) parent = os.path.dirname(fp) temp_name = os.path.join(parent, ''.join(random.choices(string.ascii_uppercase + string.digits, k=32))) os.rename(fp, temp_name) for f in os.listdir(fp): shutil.move(os.path.join(temp_name, f), parent) os.rmdir(temp_name)
for fp in os.listdir(): if not os.path.isdir(fp): continue
rjid = re.match(r'^(RJ[0-9]+)', fp) if rjid is None: rjid = re.search(r'\[(RJ[0-9]+)\]', fp) if rjid is None: continue
rjid = rjid[1] r = requests.get(f'https://www.dlsite.com/maniax/work/=/product_id/{rjid}.html', proxies={ 'https': 'socks5://localhost:10808' }, headers={ 'Accept': 'text/html', 'Accept-Language': 'ja', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36', 'DNT': '1', }) if not r.ok: continue
doc = BeautifulSoup(r.text, 'lxml') maker = doc.select_one('#work_maker span.maker_name').text.strip() title = doc.select_one('#work_name > a').text.strip() cv = [] for row in doc.select('#work_outline > tr'): if row.select_one('th').text.strip() != '声優': continue cv = [a.text.strip() for a in row.select('td > a')]
cv = '/'.join(cv[:5]) if len(cv): fname = safe_filename(f'[{maker}][{rjid}][{cv}] {title}') else: fname = safe_filename(f'[{maker}][{rjid}] {title}') while True: sub_entries = os.listdir(fp) if len(sub_entries) > 1: break sub = os.path.join(fp, sub_entries[0]) if not os.path.isdir(sub): break move_to_parent(sub)
if fp != fname: os.rename(fp, fname)
|