请转到www.cnblogs.com/iMath/

■■■■■■■■■■本博客打算放弃,请转到www.cnblogs.com/iMath/

Youtube视频解析资料

http://stackoverflow.com/questions/23424908/most-of-videos-not-getting-videourl-from-youtube


http://stackoverflow.com/questions/16555963/how-do-i-get-video-info-for-youtube-vevo-videos


https://github.com/johnny0614/YoutubeVideoDownload/blob/master/YoutubeVideoDownload/YoutubeVideoDownload.py


https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/youtube.py


http://pydoc.net/Python/Pytomo/3.0.4/pytomo.lib_youtube_download/


http://kej.tw/flvretriever

 

 

from urllib.parse import urlparse, parse_qs, unquote
import requests

proxies = {
"http": "http://127.0.0.1:8580",
"https": "http://127.0.0.1:8580"
}

headers = {
# "referer": queryUrl,
'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'

}

# token_value = url_info['token'][0]
# print(token_value)


# download_url = "https://www.youtube.com/get_video?video_id={0}&t={1}&fmt=18".format(
# video_id, token_value)


# video_title = url_info['title'][0] if 'title' in url_info else ''
# Unicode filenames are more trouble than they're worth
# filename = video_title.encode('ascii', 'ignore').decode(
# 'ascii').replace("/", "-") + '.mp4'

# print("\t Downloading '{}' to '{}'...".format(video_title, filename))

# #
# r = requests.get(download_url, stream=True,
# headers=headers)#proxies=proxies,

# print(r.status_code, r.url, int(r.headers['Content-Length']),
# type(r.headers['Content-Length'])) # ,r.history,r.status_code ,


def get_video_info(video_url='https://www.youtube.com/watch?v=NpCST_bYzWo'):
"""Get video info
Return the video
"""
video_id = parse_qs(urlparse(video_url).query)['v'][0]

for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (
'http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type))

# print(video_id)

r = requests.get(video_info_url, headers=headers) # proxies=proxies,
# print(r.text)

video_info = parse_qs(unquote(r.text))

if 'token' in video_info:
break
# except (urllib2.URLError, httplib.HTTPException, socket.error), err:
# print(
# u'ERROR: unable to download video info webpage: %s'
# % str(err))
# return

if 'token' not in video_info:
if 'reason' in video_info:
print('ERROR: YouTube said: %s'
% video_info['reason'][0].decode(
'utf-8'))
else:
print(u'ERROR: "token" parameter not in'
'video info for unknown reason')
return None

return video_info


def video_file_urls(video_info):
"""
extract video file's url from VideoInfo object and return them
"""

url_encoded_fmt_stream_map = video_info[
'url_encoded_fmt_stream_map'][0].split(',')

print(url_encoded_fmt_stream_map)

entrys = [parse_qs(entry) for entry in url_encoded_fmt_stream_map]
print('entrys------------', entrys)

# url_maps = [dict(url=entry['url'][0], type=entry['type'])
# for entry in entrys]
# return url_maps


# print(get_video_info())

print(video_file_urls(get_video_info()))

 


评论

© 请转到www.cnblogs.com/iMath/ | Powered by LOFTER