Este script permite que uma série de arquivos sejam organizados em vários DVDs/CDs de forma otimizada, isto é, tentando sempre utilizar ao máximo o espaço disponível na mídia. É útil quando você não quer pensar muito na melhor organização e eu venho usado para organizar os meus arquivos de backup. Ao final, ele lista uma opção de organização. Como o algoritmo é estatístico, você pode rodar novamente para ter um resultado diferente, apesar de construído com os mesmos critérios.
Este programa usa uma rede neural do tipo Hopfield para a solução do problema. Basicamente é necessário especificar o diretório onde os arquivos estão, o tamanho da mídia e a folga máxima. Se nenhum parâmetro é especificado, é usado o diretório corrente, uma mídia de 4500MB (DVD) e uma folga de 50MB. A folga diz o quanto o algoritmo pode encarar margem de erro, já que em geral não é possível ter uma divisão sempre perfeita.
Rode com dvdsplit.py -h para informações da linha de comando. É meu primeiro programa em Python, comentário são bem vindos.
Código
1 """
2 DVD Split, by Marcelo Barros (marcelobarrosalmeida(at)gmail.com)
3
4 March/17/2016 - modifications for python 3, removing psyco dependence (obsolete)
5 Sept/13/2005 - original version
6 $Id: dvdsplit.py,v 1.1 2005/09/13 14:54:36 barros Exp $
7 """
8 import random
9 import math
10 import os
11 import sys
12 from optparse import OptionParser
13 #import psyco
14
15 def currentSize(files, selected):
16 """
17 Calculate the size of the current selection.
18
19 Parameters:
20 - files: list with file sizes
21 - selected: list with 1 or -1, indicating if the file was selected or not
22
23 Returns:
24 - the current selection size
25 """
26 sz = 0
27 for i in range(len(files)):
28 if(selected[i] > 0):
29 sz = sz + files[i]
30 return sz
31
32 def energyFun(files,selected,dvdsz):
33 """
34 Energy function used. In this case is:
35 f(x) = |current_size - dvdsize |
36
37 Parameters:
38 - files: list with file sizes
39 - selected: list with 1 or -1, indicating if the file was selected or not
40 - dvdsz: dvd size
41
42 Returns:
43 - energy function value for the current selection
44 """
45 energ = 0
46 for i in range(len(files)):
47 if selected[i] > 0:
48 energ = energ + files[i]
49 energ = abs(energ - dvdsz)
50 return energ
51
52 def dvdInitialGuess(files,selected,dvdsz):
53 """
54 Just provide an initial random guess to the algorithm
55
56 Parameters:
57 - files: list with file sizes
58 - selected: list with 1 or -1, indicating if the file was selected or not
59 - dvdsz: dvd size
60 """
61 l = len(files)
62 idx = range(l)
63 idx = random.sample(idx,l)
64
65 t = 0
66 for i in range(l):
67 j = idx[i]
68 t = t + files[j]
69 selected[j] = 1
70 if t >= dvdsz:
71 break
72
73 def dvdSplit(files,dvdsz,slack=50,itermax=3000,beta=0.005):
74 """
75 Given a list with file sizes, the dvd size and the acceptable slack
76 this routine gives a subset that fits in one dvd.
77
78 Parameters:
79 - files: list with file sizes
80 - dvdsz: dvd size (single side)
81 - slack: maximum acceptable error when calculating the selection
82 - itermax: maximum number of iteration (default=2000)
83 - beta: sigmoid parameter (default 0.005, empirically determined)
84
85 Returns:
86 - indexes of selected files: [ idx1, idx2, ..., idxn ]
87 - indexes of not selected files: [ idx1, idx2, ..., idxn ]
88 - current selection size
89 - iteration used to calculate
90 """
91 running = True
92 iter = 0
93 numfiles = len(files)
94 selected = [-1 for x in range(numfiles)]
95 dvdInitialGuess(files,selected,dvdsz)
96
97 sz = currentSize(files,selected) # maximum size must be greather than dvdsize
98
99 if(sz > dvdsz):
100 while(running and iter < itermax):
101 # current energy
102 e1 = energyFun(files,selected,dvdsz)
103 # change state for random node
104 n = random.randint(0,numfiles-1)
105 selected[n] = -selected[n]
106 # new energy
107 e2 = energyFun(files,selected,dvdsz)
108 # evaluate the change against sigmoid and accept it or not
109 # according to a uniform probability
110 sig = (1/( 1 + math.exp(beta*(e2-e1))))
111 if sig < random.uniform(0,1):
112 selected[n] = -selected[n] # state not accepted
113 sz = currentSize(files,selected)
114 # testing exit condition: (dvdsz - slack) < currentSize < dvdsz
115 if ( (sz >= (dvdsz - slack)) and (sz < dvdsz) ):
116 running = False
117 iter = iter + 1
118
119 sel = []
120 notsel = []
121 for i in range(numfiles):
122 if selected[i] > 0:
123 sel.append(i)
124 else:
125 notsel.append(i)
126 #print "Solution found in %d iterations. Compilation size is %d (%d files)" % (iter,sz,len(sel))
127 return sel,notsel,sz,iter
128
129 def dvdsSplit(files,dvdsz,slack=50,itermax=3000,beta=0.005):
130 """
131 Given a list with file sizes, the dvd size and the acceptable slack
132 this routine distributes the selection over several dvs.
133
134 Parameters:
135 - files: list with file sizes
136 - dvdsz: dvd size (single side)
137 - slack: maximum acceptable error when calculating the selection (default=50)
138 - itermax: maximum number of iteration (default=2000)
139 - beta: sigmoid parameter (default 0.005, empirically determined)
140
141 Returns:
142 - indexes of selected files per dvds:
143 [ [idx1, idx2, ..., idxn], [idx1, idx2, ..., idxm], ..., [idx1, idx2, ..., idxk] ]
144 - compilation sizes: [ size1, size2, ..., sizen ]
145 """
146 # first create a dictionary with all file sizes indexed. Files choosen
147 # by dvdSplit will be removed from this dictionary until it becomes empty
148 filesdic = dict()
149 n = len(files)
150 for i in range(n):
151 filesdic[i] = files[i]
152 dvds = []
153 sz = []
154 running = True
155 r = 1
156 while(running):
157 v = list(filesdic.values())
158 k = list(filesdic.keys())
159 print("Starting DVD %02d [%3.2f%% completed]" % (r,100-100*len(v)/n))
160 maxtries = 50
161 while(True):
162 sel,ns,dsz,iter = dvdSplit(v,dvdsz,slack,itermax,beta)
163 if dsz < dvdsz:
164 break;
165 maxtries = maxtries - 1
166 if(maxtries == 0):
167 print("Could not find a good solution for DVD %d in 50 attempts. Try again, please." % (r))
168 sys.exit(1)
169 # getting indexes
170 r = r + 1
171 d = []
172 for s in sel:
173 d.append(k[s])
174 del filesdic[k[s]]
175 # saving dvd compilation
176 dvds.append(d)
177 sz.append(dsz)
178 if len(ns) == 0:
179 running = False
180
181 return dvds, sz
182
183 def dvdConsistency(filenames,filesizes,dvdsz):
184 """
185 Consistency checks.
186
187 Parameters:
188 - filenames: file names dict
189 - filesizes: file sizes dict
190 - dvds: dvd size
191 """
192 # look for any file bigger than dvd side
193 for i in filesizes:
194 if filesizes[i] >= dvdsz:
195 print("Size %d is bigger than dvd size %d" % (filesizes[i],dvdsz))
196 sys.exit(1)
197
198 def printDvdLayout(filesizes,filenames,dvdsz,dvds,s):
199 """
200 Print results
201 """
202 i = 0
203 n = len(dvds)
204 tts = []
205 print("\nSummary:")
206 for dvd in dvds:
207 tt = []
208 i = i + 1
209 for track in dvd:
210 tt.append(filesizes[track])
211 tts.append(sum(tt))
212 print("[DVD %d/%d, SIZE %dMBs, USAGE %2.2f%%]" % (i,n,tts[i-1],100.0*tts[i-1]/dvdsz))
213
214 i = 0
215 print("\nFile seletion per DVD:")
216 for dvd in dvds:
217 i = i + 1
218 print("[DVD %d/%d, SIZE %dMBs]" % (i,n,tts[i-1]))
219 for track in dvd:
220 files = filenames[track].split(',')
221 files.sort()
222 for f in files:
223 print("\t%s" % (f))
224
225 def buildDvds(basedir,dvdsz,slack):
226 """
227 Get the files list and call optimization routines
228 """
229 filelist = os.listdir(basedir)
230 filenames = dict()
231 filesizes = dict()
232 i = 0
233 print("\nFiles at base directory %s (DVD size = %d, slack = %d)" % (basedir,dvdsz,slack))
234 for f in filelist:
235 filename = basedir+f
236 if os.path.isfile(filename):
237 filenames[i] = f
238 # in MB, rounded up
239 filesizes[i] = int(os.path.getsize(filename)/1024.0**2 + 1)
240 print("[%5.2fMB ] %s" % (filesizes[i],filenames[i]))
241 i = i + 1
242
243 print("\nChecking consistency ...")
244 dvdConsistency(filenames,filesizes,dvdsz)
245 dvds, s = dvdsSplit(list(filesizes.values()),dvdsz,slack)
246 printDvdLayout(filesizes,filenames,dvdsz,dvds,s)
247
248 def main():
249
250 usage = "Usage: %prog [options]"
251 cmdline = OptionParser(usage)
252 cmdline.add_option("-d", "--dir", action="store", dest="basedir", type="string", default=".", metavar="DIR", help="Directory where files are stored")
253 cmdline.add_option("-s", "--size", action="store", dest="dvdsz", type="int", default="4500", metavar="SIZE", help="DVD size (MB)")
254 cmdline.add_option("-l", "--slack",action="store", dest="slack", type="int", default="50", metavar="SLACK", help="Maximum acceptable error (MB)")
255
256 (options, args) = cmdline.parse_args()
257
258 if(os.path.isdir(options.basedir) == False):
259 print("%s is not a valid directory" %(options.basedir))
260 sys.exit(1)
261
262 options.basedir.strip()
263 basedir = os.path.normpath(options.basedir)
264 if(os.path.isdir("c:\\")):
265 basedir = basedir + "\\"
266 else:
267 basedir = basedir + "//"
268
269 #buildDvdsPrx = psyco.proxy(buildDvds)
270 #buildDvdsPrx(basedir,int(options.dvdsz),int(options.slack))
271 buildDvds(basedir,int(options.dvdsz),int(options.slack))
272
273 if __name__ == "__main__":
274
275 main()
Volta para CookBook.
Marcelo Barros