python 爬取某个听书网站

python 爬取某个听书网站

 
bookcode.txt
txt文件
13.2K

 

# 小说信息接口 http://www.6yueting.com/web/index/xsDetail?code=802f1082
# 小说章节接口 http://www.6yueting.com/web/index/xsListdetail?code=802f1082&pageSize=30&pageNum=1
# 小说章节信息 http://www.6yueting.com/web/index/video_new?code=802f1082&no=2&type=0&timestamp=1745428278390&sign=e5a2feeaea665536c9145542eab7a26a code 书籍ID no 章节ID type 未知 sign 签名
# 签名生成方法 
# sign: hex_md5((new Date).getTime() + T + b + U)
# T:就是code 也就是书籍ID 
# b:就是no 章节ID
# u:不知道是什么 生成方式为 U = lalalala + "FSKVKSKFKS" 
# lalalala是一个变量 生成方式为 var lalalala = $('.audio-iframe').data('info').substring(0,5);
# <div class="audio-iframe" data-info="abcdef1234"></div>
# lalalala 就等于 abcde
# 听书封面 http://img.6yueting.com:20001/ 20210310/cedb2c647e.jpg 变量

import requests
import json
import time
import hashlib

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

def get_audio_info(code):
    url = f"http://www.6yueting.com/web/index/xsDetail?code={code}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch audio info for code {code}: {e}")
        return None

def get_audio_chapter(code, page_num=1, page_size=30):
    url = f"http://www.6yueting.com/web/index/xsListdetail?code={code}&pageSize={page_size}&pageNum={page_num}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch audio chapters for code {code}: {e}")
        return None

def generate_sign(code, no):
    timestamp = int(time.time() * 1000)
    lalalala = "abcde"  # 根据实际情况获取
    U = lalalala + "FSKVKSKFKS"
    sign_str = f"{timestamp}{code}{no}{U}"
    sign = hashlib.md5(sign_str.encode()).hexdigest()
    return timestamp, sign

def get_audio_chapter_url(code, no):
    timestamp, sign = generate_sign(code, no)
    url = f"http://www.6yueting.com/web/index/video_new?code={code}&no={no}&type=0&timestamp={timestamp}&sign={sign}"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch audio chapter URL for code {code} and no {no}: {e}")
        return None

def adapt_audio_info(data):
    if not data or data['code'] != 200:
        return None
    audio_data = data['data']
    return {
        'cid': audio_data.get('type', ''),
        'year_id': '1',
        'area_id': '1',
        'lang_type': '1',
        'code': audio_data.get('code', ''),
        'audio_name': audio_data.get('name', ''),
        'audio_cover': audio_data.get('coverUrlLocal', ''),
        'audio_author': audio_data.get('author', ''),
        'audio_desc': audio_data.get('descXx', ''),
        'is_show': 1
    }

def adapt_audio_chapter(data):
    if not data or data['code'] != 200:
        return None
    chapters = []
    for item in data['data']['list']:
        chapters.append({
            'id': item.get('id'),
            'title': item.get('title'),
            'no': item.get('no')
        })
    return chapters

def adapt_audio_chapter_url(data):
    if not data or data['code'] != 200:
        return None
    return {
        'url': data['data'].get('videoUrl')
    }

try:
    with open('bookcode.txt', 'r') as file:
        book_codes = [line.strip() for line in file if line.strip()]
except FileNotFoundError:
    print("The file 'bookcode.txt' was not found.")
    book_codes = []

for code in book_codes:
    print(f"Processing code: {code}")
    
    # 获取音频信息
    audio_info = get_audio_info(code)
    adapted_audio_info = adapt_audio_info(audio_info)
    if adapted_audio_info:
        print("Adapted Audio Info:")
        print(json.dumps(adapted_audio_info, indent=4, ensure_ascii=False))
    else:
        print(f"No valid audio info found for code {code}")
    
    # 获取音频章节
    audio_chapter = get_audio_chapter(code)
    adapted_chapters = adapt_audio_chapter(audio_chapter)
    if adapted_chapters:
        print("Adapted Audio Chapters:")
        print(json.dumps(adapted_chapters, indent=4, ensure_ascii=False))
    else:
        print(f"No valid audio chapters found for code {code}")
    
    # 获取特定章节的音频链接
    if adapted_chapters and len(adapted_chapters) > 0:
        first_chapter_no = adapted_chapters[0]['no']
        audio_chapter_url = get_audio_chapter_url(code, first_chapter_no)
        adapted_url = adapt_audio_chapter_url(audio_chapter_url)
        if adapted_url:
            print("Adapted Audio Chapter URL:")
            print(json.dumps(adapted_url, indent=4, ensure_ascii=False))
        else:
            print(f"No valid audio chapter URL found for code {code} and no {first_chapter_no}")
    
    print('------------------------')
© 版权声明
THE END
喜欢就支持一下吧
点赞15 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容