associação pythonbrasil[11] django zope/plone planet Início Logado como (Entrar)

Você não tem permissão para executar esta ação.

Excluir mensagem

DividirArquivosEmVariosCdOuDvd

MarceloBarros

Este script permite que uma série de arquivos sejam organizados em vários DVDs/CDs de forma otimizada, isto é, tentando sempre utilizar ao máximo o espaço disponível na mídia. É útil quando você não quer pensar muito na melhor organização e eu venho usado para organizar os meus arquivos de backup. Ao final, ele lista uma opção de organização. Como o algoritmo é estatístico, você pode rodar novamente para ter um resultado diferente, apesar de construído com os mesmos critérios.

Este programa usa uma rede neural do tipo Hopfield para a solução do problema. Basicamente é necessário especificar o diretório onde os arquivos estão, o tamanho da mídia e a folga máxima. Se nenhum parâmetro é especificado, é usado o diretório corrente, uma mídia de 4500MB (DVD) e uma folga de 50MB. A folga diz o quanto o algoritmo pode encarar margem de erro, já que em geral não é possível ter uma divisão sempre perfeita.

Rode com dvdsplit.py -h para informações da linha de comando. É meu primeiro programa em Python, comentário são bem vindos.

Código

   1 """
   2 DVD Split, by Marcelo Barros (marcelobarrosalmeida(at)gmail.com)
   3 
   4 March/17/2016 - modifications for python 3, removing psyco dependence (obsolete)
   5 Sept/13/2005 - original version
   6 $Id: dvdsplit.py,v 1.1 2005/09/13 14:54:36 barros Exp $
   7 """
   8 import random
   9 import math
  10 import os
  11 import sys
  12 from optparse import OptionParser
  13 #import psyco
  14 
  15 def currentSize(files, selected):
  16     """
  17     Calculate the size of the current selection.
  18     
  19     Parameters:
  20     - files: list with file sizes
  21     - selected: list with 1 or -1, indicating if the file was selected or not
  22 
  23     Returns:
  24     - the current selection size
  25     """
  26     sz = 0
  27     for i in range(len(files)):
  28         if(selected[i] > 0):
  29             sz = sz + files[i]
  30     return sz
  31 
  32 def energyFun(files,selected,dvdsz):
  33     """
  34     Energy function used. In this case is:
  35     f(x) = |current_size - dvdsize |
  36 
  37     Parameters:
  38     - files: list with file sizes
  39     - selected: list with 1 or -1, indicating if the file was selected or not
  40     - dvdsz: dvd size
  41 
  42     Returns:
  43     - energy function value for the current selection
  44     """
  45     energ = 0
  46     for i in range(len(files)):
  47         if selected[i] > 0:
  48             energ = energ + files[i]
  49     energ = abs(energ - dvdsz)
  50     return energ
  51 
  52 def dvdInitialGuess(files,selected,dvdsz):
  53     """
  54     Just provide an initial random guess to the algorithm
  55 
  56     Parameters:
  57     - files: list with file sizes
  58     - selected: list with 1 or -1, indicating if the file was selected or not
  59     - dvdsz: dvd size    
  60     """
  61     l = len(files)
  62     idx = range(l)
  63     idx = random.sample(idx,l)
  64 
  65     t = 0
  66     for i in range(l):
  67         j = idx[i]
  68         t = t + files[j]
  69         selected[j] = 1
  70         if t >= dvdsz:
  71             break
  72     
  73 def dvdSplit(files,dvdsz,slack=50,itermax=3000,beta=0.005):
  74     """
  75     Given a list with file sizes, the dvd size and the acceptable slack
  76     this routine gives a subset that fits in one dvd.  
  77 
  78     Parameters:
  79     - files: list with file sizes
  80     - dvdsz: dvd size (single side)
  81     - slack: maximum acceptable error when calculating the selection
  82     - itermax: maximum number of iteration (default=2000)
  83     - beta: sigmoid parameter (default 0.005, empirically determined)
  84 
  85     Returns:
  86     - indexes of selected files: [ idx1, idx2, ..., idxn ]
  87     - indexes of not selected files: [ idx1, idx2, ..., idxn ]
  88     - current selection size
  89     - iteration used to calculate
  90     """
  91     running = True
  92     iter = 0
  93     numfiles = len(files)
  94     selected = [-1 for x in range(numfiles)]
  95     dvdInitialGuess(files,selected,dvdsz)
  96     
  97     sz = currentSize(files,selected) # maximum size must be greather than dvdsize
  98 
  99     if(sz > dvdsz):
 100         while(running and iter < itermax):
 101             # current energy 
 102             e1 = energyFun(files,selected,dvdsz)
 103             # change state for random node
 104             n = random.randint(0,numfiles-1)
 105             selected[n] = -selected[n]
 106             # new energy
 107             e2 = energyFun(files,selected,dvdsz)
 108             # evaluate the change against sigmoid and accept it or not
 109             # according to a uniform probability
 110             sig = (1/( 1 + math.exp(beta*(e2-e1))))
 111             if sig < random.uniform(0,1):
 112                 selected[n] = -selected[n] # state not accepted
 113             sz = currentSize(files,selected)
 114             # testing exit condition: (dvdsz - slack) < currentSize < dvdsz
 115             if ( (sz >= (dvdsz - slack)) and (sz < dvdsz) ):
 116                 running = False
 117             iter = iter + 1
 118         
 119     sel = []
 120     notsel = []
 121     for i in range(numfiles):
 122         if selected[i] > 0:
 123             sel.append(i)
 124         else:
 125             notsel.append(i)
 126     #print "Solution found in %d iterations. Compilation size is %d (%d files)" % (iter,sz,len(sel))
 127     return sel,notsel,sz,iter
 128 
 129 def dvdsSplit(files,dvdsz,slack=50,itermax=3000,beta=0.005):
 130     """
 131     Given a list with file sizes, the dvd size and the acceptable slack
 132     this routine distributes the selection over several dvs.
 133 
 134     Parameters:
 135     - files: list with file sizes
 136     - dvdsz: dvd size (single side)
 137     - slack: maximum acceptable error when calculating the selection (default=50)
 138     - itermax: maximum number of iteration (default=2000)
 139     - beta: sigmoid parameter (default 0.005, empirically determined)
 140 
 141     Returns:
 142     - indexes of selected files per dvds:
 143       [ [idx1, idx2, ..., idxn], [idx1, idx2, ..., idxm], ..., [idx1, idx2, ..., idxk] ]
 144     - compilation sizes: [ size1, size2, ..., sizen ]      
 145     """
 146     # first create a dictionary with all file sizes indexed. Files choosen
 147     # by dvdSplit will be removed from this dictionary until it becomes empty
 148     filesdic = dict()
 149     n = len(files)
 150     for i in range(n):
 151         filesdic[i] = files[i]
 152     dvds = []
 153     sz = []
 154     running = True
 155     r = 1
 156     while(running):    
 157         v = list(filesdic.values())
 158         k = list(filesdic.keys())
 159         print("Starting DVD %02d [%3.2f%% completed]" % (r,100-100*len(v)/n))
 160         maxtries = 50
 161         while(True):
 162             sel,ns,dsz,iter = dvdSplit(v,dvdsz,slack,itermax,beta)
 163             if dsz < dvdsz:
 164                 break;
 165             maxtries = maxtries - 1
 166             if(maxtries == 0):
 167                 print("Could not find a good solution for DVD %d in 50 attempts. Try again, please." % (r))
 168                 sys.exit(1)
 169         # getting indexes
 170         r = r + 1
 171         d = []
 172         for s in sel: 
 173             d.append(k[s])
 174             del filesdic[k[s]]
 175         # saving dvd compilation
 176         dvds.append(d)
 177         sz.append(dsz)
 178         if len(ns) == 0:
 179             running = False
 180 
 181     return dvds, sz
 182 
 183 def dvdConsistency(filenames,filesizes,dvdsz):
 184     """
 185     Consistency checks.
 186 
 187     Parameters:
 188     - filenames: file names dict
 189     - filesizes: file sizes dict
 190     - dvds: dvd size
 191     """
 192     # look for any file bigger than dvd side
 193     for i in filesizes:
 194         if filesizes[i] >= dvdsz:
 195             print("Size %d is bigger than dvd size %d" % (filesizes[i],dvdsz))
 196             sys.exit(1)
 197 
 198 def printDvdLayout(filesizes,filenames,dvdsz,dvds,s):
 199     """
 200     Print results
 201     """
 202     i = 0
 203     n = len(dvds)    
 204     tts = []
 205     print("\nSummary:")
 206     for dvd in dvds:
 207         tt = []
 208         i = i + 1
 209         for track in dvd:
 210             tt.append(filesizes[track])
 211         tts.append(sum(tt))
 212         print("[DVD %d/%d, SIZE %dMBs, USAGE %2.2f%%]" % (i,n,tts[i-1],100.0*tts[i-1]/dvdsz))
 213             
 214     i = 0
 215     print("\nFile seletion per DVD:")
 216     for dvd in dvds:
 217         i = i + 1
 218         print("[DVD %d/%d, SIZE %dMBs]" % (i,n,tts[i-1]))
 219         for track in dvd:
 220             files = filenames[track].split(',')
 221             files.sort()
 222             for f in files:
 223                 print("\t%s" % (f))
 224         
 225 def buildDvds(basedir,dvdsz,slack):
 226     """
 227     Get the files list and call optimization routines
 228     """
 229     filelist = os.listdir(basedir)
 230     filenames = dict()
 231     filesizes = dict()
 232     i = 0
 233     print("\nFiles at base directory %s (DVD size = %d, slack = %d)" % (basedir,dvdsz,slack))
 234     for f in filelist:
 235         filename = basedir+f
 236         if os.path.isfile(filename):
 237             filenames[i] = f
 238             # in MB, rounded up
 239             filesizes[i] = int(os.path.getsize(filename)/1024.0**2 + 1)
 240             print("[%5.2fMB ] %s" % (filesizes[i],filenames[i]))
 241             i = i + 1
 242 
 243     print("\nChecking consistency ...")
 244     dvdConsistency(filenames,filesizes,dvdsz)
 245     dvds, s = dvdsSplit(list(filesizes.values()),dvdsz,slack)
 246     printDvdLayout(filesizes,filenames,dvdsz,dvds,s)
 247 
 248 def main():
 249 
 250     usage = "Usage: %prog [options]"
 251     cmdline = OptionParser(usage)
 252     cmdline.add_option("-d", "--dir",  action="store", dest="basedir", type="string", default=".",    metavar="DIR",      help="Directory where files are stored")
 253     cmdline.add_option("-s", "--size", action="store", dest="dvdsz",   type="int",    default="4500", metavar="SIZE",     help="DVD size (MB)")
 254     cmdline.add_option("-l", "--slack",action="store", dest="slack",   type="int",    default="50",   metavar="SLACK",    help="Maximum acceptable error (MB)")
 255     
 256     (options, args) = cmdline.parse_args()
 257 
 258     if(os.path.isdir(options.basedir) == False):
 259         print("%s is not a valid directory" %(options.basedir))
 260         sys.exit(1)
 261 
 262     options.basedir.strip()
 263     basedir = os.path.normpath(options.basedir)
 264     if(os.path.isdir("c:\\")):
 265         basedir = basedir + "\\"
 266     else:
 267         basedir = basedir + "//"
 268 
 269     #buildDvdsPrx = psyco.proxy(buildDvds)
 270     #buildDvdsPrx(basedir,int(options.dvdsz),int(options.slack))
 271     buildDvds(basedir,int(options.dvdsz),int(options.slack))
 272     
 273 if __name__ == "__main__":
 274 
 275     main()

Volta para CookBook.


Marcelo Barros