您好,登錄后才能下訂單哦!
本文實例為大家分享了python批量爬取下載抖音視頻的具體代碼,供大家參考,具體內(nèi)容如下
import os import requests import re import sys import asyncio import aiohttp headers = { 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) ' 'Version/11.0 Mobile/15A372 Safari/604.1' } VIDEO_URLS, PAGE = [], 1 def get_info(url): """ :param url: 用戶的鏈接 :return:返回name,dytk,user_id 參數(shù) """ name = None dytk = None user_id = None try: response = requests.get(url, headers=headers) user_id = response.url.split('/')[5].split('?')[0] name = re.search(r'class="nickname">(.*?)<', response.text)[1] dytk = re.search(r"dytk: '(.*?)'", response.text)[1] except (TypeError, IndexError): sys.stdout.write('Waring:輸入的鏈接錯誤') except requests.exceptions: sys.stdout.write('Waring:鏈接錯誤') finally: return name, user_id, dytk def make_dir(name): """ 建立文件夾 :param name: 用戶名稱 :return: """ if not os.path.isdir(name): os.mkdir(name) else: pass def get_all_video(user_id, max_cursor, dytk): """ 獲取視頻的地址 :param user_id: :param max_cursor: :param dytk: :return: """ url = "https://www.amemv.com/aweme/v1/aweme/post/?" params = {'user_id': user_id, 'count': 21, 'max_cursor': max_cursor, 'dytk': dytk} try: response = requests.get(url=url, params=params, headers=headers) if response.status_code == 200: datas = response.json() for data in datas['aweme_list']: name = data.get('share_info').get('share_desc') url = data.get('video').get('play_addr').get('url_list')[0].replace('playwm', 'play') VIDEO_URLS.append([name, url]) if datas['has_more'] == 1 and datas.get('max_cursor') != 0: global PAGE print(f'收集第{PAGE}頁視頻') PAGE += 1 return get_all_video(user_id, datas.get('max_cursor'), dytk) else: print('收集完成') return VIDEO_URLS else: print('狀態(tài)碼:', response.status_code) return None except Exception as e: print('Waring:', e) return async def download_video(index, name, video_name, url): """ 下載視頻 :param index: 視頻id :param name: 用戶名稱 :param video_name: 視頻名稱 :param url: 下載url :return: """ print(f'正在下載第{index}個視頻:{video_name}') video_path = '{}/{}.mp4'.format(name, video_name) if not os.path.isfile(video_path): try: async with aiohttp.ClientSession() as session: async with session.get(url=url, headers=headers, ssl=False) as response: with open(video_path, 'wb') as f: while True: chunk = await response.content.read(1024) f.write(chunk) if not chunk: break print(f'下載完成第{index}個視頻:{video_name}') except Exception as e: print('waring:download faild', video_name, e) return else: print('文件已存在') def main(): url = 'http://v.douyin.com/dEorkn/' name, user_id, dytk = get_info(url) if not (name, user_id, dytk): return make_dir(name) get_all_video(user_id, 0, dytk) print(f'{name}:總共有{len(VIDEO_URLS)}個視頻') tasks = [] for index, item in enumerate(VIDEO_URLS, 1): video_name = item[0] url = item[1] tasks.append(asyncio.ensure_future(download_video(index, name, video_name, url))) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.run_until_complete(asyncio.sleep(0)) loop.close() print(f'{name}視頻下載完成!') if __name__ == '__main__': main()
以上就是本文的全部內(nèi)容,希望對大家的學(xué)習(xí)有所幫助,也希望大家多多支持億速云。
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。