python - Extract youtube meta-info with regex -
i want extract youtube video meta-info using python regular expression, here have found extract video url , video title
import re import urllib2 #https://www.youtube.com/user/<channel_name>/videos #here channel name schooloflifechannel site = "https://www.youtube.com/user/schooloflifechannel/videos" videos = [] #re_filter defines filter find anchor tags using regular expressions m_filter = r'<ul.*?/ul>' read_url = urllib2.urlopen(site) re_filter = r'<a.*?/a>' all_links_temp = re.findall(re_filter,read_url.read(),re.multiline) #this varialbe denotes class of each video link of channel video_link_class = 'class="yt-uix-sessionlink yt-uix-tile-link' video_titles = [] video_links = [] #this loop finds titles , links videos , stores them in 2 seperate lists. #you use dictionary here might make things easier parsing further temp in all_links_temp: if video_link_class in temp: title_filter = r'title=".*?"' video_title_temp = re.findall(title_filter,temp,re.multiline) video_titles.append(video_title_temp) url_filter = r'href=".*?"' video_link_temp = re.findall(url_filter,temp,re.multiline) video_links.append(video_link_temp) #this simple prints title followed link of each video in lists j = 0 in video_titles: temp_title = str(i) temp_title = temp_title[9:-3] temp_link = str(video_links[j]) temp_link = "https://www.youtube.com"+temp_link[8:-3] videos.append((temp_link, temp_title)) j = j + 1 print videos
now wanted extract meta-info class="yt-lockup-meta-info" , join of results in format ("video_url", "video_title", "video_upload_date"). how can python re
Comments
Post a Comment