import os import md5 import urllib import xml.dom.minidom import unicodedata import copy import re import time print "quick&dirty VKontakte music downloader" print "-----------------------------------------------------------" #define clear screen def clearscreen(numlines=100): print "-----------------------------------------------------------" if os.name == "posix": # Unix/Linux/MacOS/BSD/etc os.system('clear') elif os.name in ("nt", "dos", "ce"): # DOS/Windows os.system('CLS') else: # Fallback for other operating systems. print '\n' * numlines #convert to ascii def to_ascii(string_in): return unicodedata.normalize('NFKD', string_in).encode('ascii','replace') ##return string_in #convert to ascii, ignore def to_ascii_ignore(string_in): return unicodedata.normalize('NFKD', string_in).encode('ascii','ignore') ##return string_in #convert duration to HMS def GetHMS(seconds): hours = seconds / 3600 seconds -= 3600*hours minutes = seconds / 60 seconds -= 60*minutes if hours == 0: return "%02d:%02d" % (minutes, seconds) return "%02d:%02d:%02d" % (hours, minutes, seconds) #find desktop folder def desktopfolder(): if os.name == "posix": # Unix/Linux/MacOS/BSD/etc return "~/Desktop/" elif os.name in ("nt", "dos", "ce"): # DOS/Windows temp = os.path.join(os.path.expanduser('~'), 'Desktop') temp = temp + """\\""" return temp else: # Fallback for other operating systems. temp = os.path.join(os.path.expanduser('~'), 'Desktop') temp = temp + """/""" return temp #make the sig def make_sig(self, method, query, count): APP_ID = "1850196" SECRET_KEY = "nk0n6I6vjQ" USER_ID = "76347967" method = "audio.search" strg = USER_ID + "api_id=" + APP_ID + "count=" + count + "method=" + method + "q=" + query + "test_mode=1v=2.0" + SECRET_KEY return md5.new(strg).hexdigest() #add the data from every node in the dom object def getText(nodelist): rc = [] for node in nodelist: if node.nodeType == node.TEXT_NODE: rc.append(node.data) return ''.join(rc) #mommy function, this calls all the other stuff def handlexml(xmlfile): #calls the counter info #handlexmlcount(xmlfile.getElementsByTagName("count")[0]) #grab all the elements of the audio secion of the xml file audios = xmlfile.getElementsByTagName("audio") #outputs the crap urllist = handleToc(audios) return urllist ##handleSlides(slides) #some old crap, ignore def handlexmlcount(count):\ #prints the number of results obtained print "Results:%s" % getText(count.childNodes) print "-----------------------------------------------------------" #most powerful function, parses and outputs the audio list we hand it def handleToc(audios): #place is so we can put a pretty number in front of every entry place = 0 #initialize so we can plug all the urls in urllist = [] for audio in audios: #lol place = place + 1 #grab the subsection data artist = audio.getElementsByTagName("artist")[0] title = audio.getElementsByTagName("title")[0] duration = audio.getElementsByTagName("duration")[0] artist = getText(artist.childNodes) title = getText(title.childNodes) #sanitize the given duration duration = GetHMS(int(getText(duration.childNodes))) #obtain the url url = audio.getElementsByTagName("url")[0] url = getText(url.childNodes) #make a list of all the data audiolist = [] audiolist.append(place) audiolist.append(artist) audiolist.append(title) audiolist.append(duration) audiolist.append(url) #pass it back urllist.append(audiolist) return urllist #print function, take out all the crap and print it as a row each recursive list def printrows(count, lists): for row in xrange(int(count)): #take out the internal list row = lists.pop(0) #grab all the values from the intenal list rowplace = row.pop(0) rowartist = row.pop(0) rowtitle = row.pop(0) rowduration = row.pop(0) #prepare and print out the info totalrow = "#" + str(rowplace) + ") %s - %s - %s" % (to_ascii(rowartist), to_ascii(rowtitle), rowduration) print totalrow #house cleaning row = totalrow = [0] #grab the xml file as dom given the count and query def apigrab(count, query): #no clue what self does, but ok, i'll just initialize the variable anyways sig = self = 0 #some variables. APP_ID = "1850196" SECRET_KEY = "nk0n6I6vjQ" USER_ID = "76347967" method = "audio.search" #actually run the sig sig = make_sig(self, method, query, count) #generate the url url = """http://api.vk.com/api.php?api_id=1850196&count=""" + count + """&v=2.0&method=audio.search&sig=""" + sig + """&test_mode=1&q=""" + query #parse xml by dom #convert xml to dom object dom = xml.dom.minidom.parse(urllib.urlopen(url)) return dom #print out needed info for a pick from dom file def infoprinter(dom, count): #print number of results handlexmlcount(dom.getElementsByTagName("count")[0]) #parse the list a few times printlist = copy.deepcopy(handlexml(dom)) #call the printer function printrows(count, printlist) print "-----------------------------------------------------------" print "" #download the picked song def downloadpicked(resultinfo): filelocation = desktopfolder() fileartist = resultinfo[1:2] filetitle = resultinfo[2:3] fileurl = resultinfo[4:5] filename = filelocation + fileartist.pop(0) + " - " + filetitle.pop(0) + ".mp3" urllib.urlretrieve(fileurl.pop(0), filename) #remove empty spaces in list def striplist(l): return([x.strip() for x in l]) #take batch query file and spit out the parsed list def queryparser(batchlocation): batchdata = open(batchlocation, 'r').read().splitlines() batchdata = striplist(batchdata) #removes whitespace characters in the list batchdata = filter(None, batchdata) #this kills the empty elements batchdata = striplist(batchdata) #removes whitespace characters in the list batchdata = filter(None, batchdata) #this kills the empty elements datareturn = [] for x in batchdata: datareturn.append(re.sub(r'\W+', ' ', x)) #fucking regex, i'll never understand how it works return datareturn #mother downloader for all the files def massdlengine(dllist): counter = 1 dlinfolist = [] for x in dllist: count = "1" dom = apigrab(count, x) printlist = copy.deepcopy(handlexml(dom)) printlist = printlist.pop(0) printlist.pop(0) printlist.pop(2) printlist.append(counter) dlinfolist.append(printlist) print printlist time.sleep(.5) counter = counter + 1 return dlinfolist #ask user if they want batch mode yes = "y" batch = raw_input("""Do you want to use batch mode? Type "y" to do so: """) clearscreen() batch = yes #the whole thing if batch == yes: batchlocation = raw_input("""Where is your file?: """) #batchlocation = "C:\Users\james\Desktop\list.txt" #parse the damn file batchquerylist = queryparser(batchlocation) dllist = batchquerylist #download ALL THE THINGS dlinfolist = massdlengine(dllist) #so far i'm here else: #ask user for what to get, and sanitize count, and clean up console window query = raw_input("What is your singular query? Note: Chinese characters don't work yet: ") count = str(int(raw_input("How many results do you want? "))) clearscreen() #get the api results dom = apigrab(count, query) #print ALL THE THINGS infoprinter(dom, count) #ask user for which song they want and clean up what i'm showing which = int(raw_input("Which song # do you want (ignore question marks, that's russian characters)(scroll up)(type in a number)(yes the selection is bad, and looks like limewire... w/e): ")) - 1 clearscreen() #download the file to the desktop print "Downloading..." #grab the song downloadpicked(handlexml(dom).pop(which)) #end the program pause = raw_input("Done, press enter to exit") exit #pause (only for actually running in console) ##os.system('pause')