复制代码- import os, re, random, shutil, string
- import requests
- from bs4 import BeautifulSoup
- def safe_filename(fname):
- for k, v in {
- '<': '<', '>': '>', ':': ':', '"': '"', '/': '/',
- '\\': '\', '|': '|', '?': '?', '*': '*',
- }.items(): fname = fname.replace(k, v)
- return fname
- def move_to_parent(fp):
- fp = os.path.abspath(fp)
- parent = os.path.dirname(fp)
- temp_name = os.path.join(parent, ''.join(random.choices(string.ascii_uppercase + string.digits, k=32)))
- os.rename(fp, temp_name)
- for f in os.listdir(fp): shutil.move(os.path.join(temp_name, f), parent)
- os.rmdir(temp_name)
- for fp in os.listdir():
- if not os.path.isdir(fp): continue
- rjid = re.match(r'^(RJ[0-9]+)', fp)
- if rjid is None: rjid = re.search(r'\[(RJ[0-9]+)\]', fp)
- if rjid is None: continue
- rjid = rjid[1]
- r = requests.get(f'https://www.dlsite.com/maniax/work/=/product_id/{rjid}.html', proxies={
- 'https': 'socks5://localhost:10808'
- }, headers={
- 'Accept': 'text/html',
- 'Accept-Language': 'ja',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
- 'DNT': '1',
- })
- if not r.ok: continue
- doc = BeautifulSoup(r.text, 'lxml')
- maker = doc.select_one('#work_maker span.maker_name').text.strip()
- title = doc.select_one('#work_name > a').text.strip()
- cv = []
- for row in doc.select('#work_outline > tr'):
- if row.select_one('th').text.strip() != '声優': continue
- cv = [a.text.strip() for a in row.select('td > a')]
- cv = '/'.join(cv[:5])
- if len(cv): fname = safe_filename(f'[{maker}][{rjid}][{cv}] {title}')
- else: fname = safe_filename(f'[{maker}][{rjid}] {title}')
-
- while True:
- sub_entries = os.listdir(fp)
- if len(sub_entries) > 1: break
- sub = os.path.join(fp, sub_entries[0])
- if not os.path.isdir(sub): break
- move_to_parent(sub)
- if fp != fname: os.rename(fp, fname)
|