python - Extract youtube meta-info with regex -


i want extract youtube video meta-info using python regular expression, here have found extract video url , video title

import re import urllib2  #https://www.youtube.com/user/<channel_name>/videos #here channel name schooloflifechannel site = "https://www.youtube.com/user/schooloflifechannel/videos" videos = []  #re_filter defines filter find anchor tags using regular expressions m_filter = r'<ul.*?/ul>' read_url = urllib2.urlopen(site) re_filter = r'<a.*?/a>' all_links_temp = re.findall(re_filter,read_url.read(),re.multiline) #this varialbe denotes class of each video link of channel video_link_class = 'class="yt-uix-sessionlink yt-uix-tile-link' video_titles = [] video_links = [] #this loop finds titles , links videos , stores them in 2 seperate lists.  #you use dictionary here might make things easier parsing further temp in all_links_temp:     if video_link_class in temp:         title_filter = r'title=".*?"'         video_title_temp = re.findall(title_filter,temp,re.multiline)         video_titles.append(video_title_temp)     url_filter = r'href=".*?"'     video_link_temp = re.findall(url_filter,temp,re.multiline)     video_links.append(video_link_temp) #this simple prints title followed link of each video in lists j = 0        in video_titles:     temp_title = str(i)     temp_title = temp_title[9:-3]     temp_link = str(video_links[j])     temp_link = "https://www.youtube.com"+temp_link[8:-3]     videos.append((temp_link, temp_title))     j = j + 1 print videos 

now wanted extract meta-info class="yt-lockup-meta-info" , join of results in format ("video_url", "video_title", "video_upload_date"). how can python re


Comments

Popular posts from this blog

Is there a better way to structure post methods in Class Based Views -

performance - Why is XCHG reg, reg a 3 micro-op instruction on modern Intel architectures? -

jquery - Responsive Navbar with Sub Navbar -