Re: Image downloader

Hello! I do not very cool speak on English be course I from the Moscow by that please excuse me. Some time ago I try found the program for download all images from site, but I do not found it anywhere. Some like that was the HTTRack, but he do not work properly. I try write my self program on Python with wxPython skin. Below is this code. Program require multi start to complete full download. I try answer to all you questions.
from os.path import splitext
from time import sleep
from urlparse import urljoin,urlparse,urlunparse
from urllib import urlretrieve

class LogFile(file):
    def __init__(self,name,LogSet=()):
        self.FileName=name+'.log'
        if os.access(self.FileName,os.R_OK):
            self.ReadLog()
        else:
            self.set=set(LogSet)

    def WriteLog(self):
        f=file(self.FileName,'w')
        f.write('\n'.join(self.set))
        f.close()

    def ReadLog(self):
        f=file(self.FileName,'r')
        self.set=set(map(lambda x:x.strip('\n\r'),f.readlines()))
        f.close()

    def add(self,item):
        self.set.add(item)

    def join(self,AddLog):
        if hasattr(AddLog,'set'):
            self.set.update(AddLog.set)
        else:
            self.set.update(AddLog)

    def rem(self,RemLog):
        if hasattr(RemLog,'set'):
            self.set.difference_update(RemLog.set)
        else:
            self.set.difference_update(RemLog)

    def size(self):
        return len(self.set)

    def pop(self):
        return self.set.pop()

    def GetLog(self):
        return self.set

def OnError(FoundedA,CurrentURL,ImagesLinks,ImageURL):
    FoundedA.add(CurrentURL)
    if ImageURL!='':
        ImagesLinks.add(ImageURL)

def message(mess):
    dialog=wx.MessageDialog(frame,mess)
    dialog.ShowModal()
    dialog.Destroy()

def OnClickOK(event):
    referer=TextURL.GetValue()
    MainURL=urlparse(referer)[1]
    FoundedA=LogFile('founded',set(('/',)))
    VisitedA=LogFile('visited')
    ImagesLinks=LogFile('images')
    SavedImages=LogFile('saved')
    compare=open('compare.log','a')
    FoundedA.rem(VisitedA)
    SetImages=set()
    FullImageURL=''
    NewFoundedA=set()
    for n in xrange(1000):
        if FoundedA.size()>0:
            CurrentURL=FoundedA.pop()
            ImageFound=False
            try:
                SetA,SetImages=GetURLsSets(CurrentURL,MainURL,referer)
                NewFoundedA.update(SetA)
                NewFoundedA.difference_update(VisitedA.GetLog())
                if CurrentURL in NewFoundedA:
                    NewFoundedA.remove(CurrentURL)
                SetImages.difference_update(SavedImages.GetLog())
                ImagesLinks.join(SetImages)
                while ImagesLinks.size()>0:
                    ImageFound=True
                    FullImageURL=ImagesLinks.pop()
                    ParsedImageURL=urlparse(FullImageURL)
                    ImageURL=urlunparse(('',)+ParsedImageURL[1:])
                    FileName='%i%s' % (SavedImages.size(),ImageURL.replace('/','-'))
                    if ParsedImageURL[1]=='':
                        print 'URL:%s, ImageURL:%s' % (CurrentURL,ImageURL)
                        f=file(FileName,'wb')
                        conn=httplib.HTTPConnection(MainURL)
                        conn.request('GET',ImageURL)
                        f.write(conn.getresponse().read())
                        f.close()
                    else:
                        print 'URL:%s, ImageURL:%s' % (CurrentURL,FullImageURL)
                        ConnectURL=urlretrieve(FullImageURL,FileName)
                    compare.write('%s %s\n' % (FileName,CurrentURL))
                    SavedImages.add(FullImageURL)
                    ImagesDisp.SetLabel('%i' % (ImagesLinks.size(),))
                    SavedDisp.SetLabel('%i' % (SavedImages.size(),))
                    app.Yield()
                FullImageURL=''
                VisitedA.add(CurrentURL)
                if ImageFound:
                    FoundedA.join(NewFoundedA)
                VisitedDisp.SetLabel('%i' % (VisitedA.size(),))
                FoundedDisp.SetLabel('%i' % (FoundedA.size(),))
                app.Yield()
            except httplib.CannotSendRequest:
                ErrorMess='Cannot send request'
                print 'Error:%s, URL:%s' % (ErrorMess,CurrentURL)
                OnError(FoundedA,CurrentURL,ImagesLinks,FullImageURL)
                sleep(60)
##                message(ErrorMess)
            except httplib.BadStatusLine:
                ErrorMess='Bad Status Line'
                print 'Error:%s, URL:%s' % (ErrorMess,CurrentURL)
                OnError(FoundedA,CurrentURL,ImagesLinks,FullImageURL)
                sleep(60)
##                message(ErrorMess)
            except socket.error:
                ErrorMess='Socket error'
                print 'Error:%s, URL:%s' % (ErrorMess,CurrentURL)
                OnError(FoundedA,CurrentURL,ImagesLinks,FullImageURL)
                sleep(60)
##                message(ErrorMess)
            except IOError:
                ErrorMess='IOError'
                print 'Error:%s, URL:%s' % (ErrorMess,FullImageURL)
                SavedImages.add(FullImageURL)
                FoundedA.add(CurrentURL)
                sleep(60)
##                message(ErrorMess)
    FoundedA.join(NewFoundedA)
    FoundedA.WriteLog()
    VisitedA.WriteLog()
    ImagesLinks.WriteLog()
    SavedImages.WriteLog()
    compare.close()
    message('Download complete')

def OnClickSetDir(event):
    dialog=wx.DirDialog(frame)
    if dialog.ShowModal()==wx.ID_OK:
        os.chdir(dialog.GetPath())
    dialog.Destroy()

def GetURLsSets(URL,MainURL,referer):
    conn=httplib.HTTPConnection(MainURL)
    try:
        conn.request('GET',URL,headers={'Referer':referer})
        r=conn.getresponse()
        status=r.status
        print 'URL:%s, Status:%s' % (URL,status)
        if status==200:
            body=r.read()
            SetA=set(map(
                lambda x:urljoin(URL,urlunparse(('','')+x[2:])).replace('../',''),
                filter(
                    lambda x:x[0] in ('','http'),
                    map(urlparse,re.findall(
                        '<\s*a[^>]*?(?<=href)\s*=\s*[\'|\"]'
                        '([^\'\"]*?)[\'|\"][^>]*?>',body,re.I)))))
            SetImg=set(map(lambda x:urljoin(URL,x).replace('../',''),
                        re.findall('<\s*(?:img|image)[^>]*?(?<=src)\s*=\s*[\'|\"]'
                                   '([^\'\"]*?(?:jpg|jpeg|gif|png|tif|tiff))'
                                   '[\'|\"][^>]*?>',body,re.I)))
            SetMov=set(filter(lambda x:splitext(x)[-1].lower() in
                              ('.jpg','.jpeg','.gif','.png','.tif','.tiff'),SetA))
            return (SetA-SetMov,SetImg|SetMov)
        else:
            return (set(),set())
    except AttributeError:
        ErrorMess='AttributeError'
        print 'Error:%s, URL:%s' % (ErrorMess,URL)
        return (set(),set())
    except UnicodeDecodeError:
        ErrorMess='UnicodeDecodeError'
        print 'Error:%s, URL:%s' % (ErrorMess,URL)
        return (set(),set())

app=wx.App()
frame=wx.Frame(None,-1,'Image Downloader 2')
frame.SetSizer(wx.BoxSizer(wx.VERTICAL))
panel=wx.Panel(frame,-1)
frame.GetSizer().Add(panel)
box=wx.BoxSizer(wx.VERTICAL)
panel.SetSizer(box)
grid=wx.FlexGridSizer(4,2)
box.Add(grid)
grid.Add(wx.StaticText(panel,-1,'Visited links'),0,wx.ALL,5)
VisitedDisp=wx.StaticText(panel,-1,'0',size=(100,-1))
grid.Add(VisitedDisp,0,wx.ALL,5)
grid.Add(wx.StaticText(panel,-1,'Founded or left links'),0,wx.ALL,5)
FoundedDisp=wx.StaticText(panel,-1,'0',size=(100,-1))
grid.Add(FoundedDisp,0,wx.ALL,5)
grid.Add(wx.StaticText(panel,-1,'Founded or left images'),0,wx.ALL,5)
ImagesDisp=wx.StaticText(panel,-1,'0',size=(100,-1))
grid.Add(ImagesDisp,0,wx.ALL,5)
grid.Add(wx.StaticText(panel,-1,'Saved images'),0,wx.ALL,5)
SavedDisp=wx.StaticText(panel,-1,'0',size=(100,-1))
grid.Add(SavedDisp,0,wx.ALL,5)
box.Add(wx.StaticText(panel,-1,'Input URL in text window below'))
TextURL=wx.TextCtrl(panel,-1)
box.Add(TextURL,0,wx.EXPAND)
box.Add(wx.StaticText(panel,-1,'Select directory to download by button below'))
SetDir=wx.Button(panel,wx.ID_OPEN)
box.Add(SetDir,0,wx.ALIGN_CENTER)
SetDir.Bind(wx.EVT_BUTTON,OnClickSetDir)
OK=wx.Button(panel,wx.ID_OK)
box.Add(OK,0,wx.ALIGN_CENTER)
OK.Bind(wx.EVT_BUTTON,OnClickOK)
frame.Fit()
frame.Show()
app.MainLoop()

Comments

  • jesse eisenberg needed that

    <removed link>
  • This is new version of this program with new interface and many options. This post is long and I continue on the next.
    from httplib import BadStatusLine,CannotSendRequest,\
         HTTPConnection,IncompleteRead
    from os import access,chdir,getcwd,mkdir,F_OK,R_OK
    from os.path import splitext
    from re import findall,I
    from urlparse import urljoin,urlparse,urlunparse
    import socket,wx
    def JoinLogs(*logs):
        return reduce(lambda x,y:(x.set if hasattr(x,'set') else x)|\
                      (y.set if hasattr(y,'set') else y),logs,set())
    class LogFile:
        add=lambda self,item:self.set.add(item)
        join=lambda self,*AddLogs:self.set.update(JoinLogs(*AddLogs))
        GetLog=lambda self:self.set
        pop=lambda self:self.set.pop() if len(self.set)>0 else ''
        rem=lambda self,*RemLogs:self.set.difference_update(JoinLogs(*RemLogs))
        size=lambda self:len(self.set)
        def __init__(self,name,LogSet=()):
            self.FileName=name+'.log'
            if access(self.FileName,R_OK):
                self.ReadLog()
            else:
                self.set=set(LogSet)
        def WriteLog(self):
            f=file(self.FileName,'w')
            f.write('\n'.join(self.set))
            f.close()
        def ReadLog(self):
            f=file(self.FileName,'r')
            self.set=set(map(lambda x:x.strip('\n\r'),f.readlines()))
            f.close()
    def FileNameFromURL(URL):
        return URL.replace('http://','-').replace('/','-')
    def GetBody(window,URL,key):
        BadLinkSet={'link':window.BadA,'img':window.BadImages}[key]
        window.control.settle(BadLinkSet,URL)
        PartsURL=urlparse(URL)
        conn=HTTPConnection(PartsURL[1])
        conn.request('GET',urlunparse(('','')+PartsURL[2:]),
                     headers={'Referer':urlunparse(PartsURL[:2]+('',)*4),
                              'User-Agent':window.TextUA})
        r=conn.getresponse()
        status=r.status
        if status==200:
            body=r.read()
            if len(body)==0:
                print '  0 bytes got from %s' % URL
                if not window.SuppressAll.GetValue():
                    window.message('Zero body length.')
        elif status in (301,302):
            body=r.getheader('location')
        elif status in (400,401,402,403,404):
            BadLinkSet.add(URL)
            if not window.SuppressAll.GetValue():
                window.message('Bad %s URL.' % ({'link':'link','img':'image'}[key],))
            body=''
        else:
            body='Status %s returned' % status
        conn.close()
        return (status,body)
    def IsSubDomain(URL,MainURL):
        return urlparse(URL)[1].endswith(urlparse(MainURL)[1])
    def RemoveWWW(URL):
        PartsURL=urlparse(URL)
        return urlunparse((PartsURL[0],PartsURL[1].lstrip('Ww').lstrip('.'))+
                          PartsURL[2:])
    def StrToTuple(text):
        return tuple(map(lambda x:x.strip(),text.split(',')))
    def ExtStr(text):
        return '[\'|\"]([^\'\"]*?(?:%s))\s*[\'|\"]' % \
               '|'.join(map(lambda x:'\.'+x,StrToTuple(text)))
    def ExeptTuple(text):
        return tuple(map(lambda x:'.'+x,StrToTuple(text)))
    def GetURLsSets(window,URL,MainURL):
        def prepare(found):
            ReadyLinks=[]
            for link in found:
                try:
                    ReadyLinks.append(link.decode().strip())
                except UnicodeDecodeError as BadStr:
                    print '  Bad link:',BadStr.object
                    if not window.DecodeSuppress.GetValue() \
                       and not window.SuppressAll.GetValue():
                        window.message('UnicodeDecodeError. Bad link removed.')
            return map(lambda x:urljoin(URL,x).replace('../',''),ReadyLinks)
        status,body=GetBody(window,URL,'link')
        print '  URL: %s, Status: %s' % (URL,status)
        if status==200:
            SetA=set(
                filter(lambda x:IsSubDomain(x,MainURL) and splitext(x)\
                       [-1].lower() not in window.ExeptTuple,prepare(
                           findall('<\s*a[^>]*?(?<=href)\s*=\s*[\'|\"]'
                                   '\s*([^\'\"]*?)\s*[\'|\"]',body,I))))
            SetImg=set(prepare(findall(window.ExtStr,body,I)))
            return (SetA,SetImg)
        elif status in (301,302):
            body=urljoin(URL,body)
            if IsSubDomain(body,MainURL):
                print '  Redirect to URL: %s' % (body,)
                return (set((body,)),set())
            else:
                return (set(),set())
        else:
            return (set(),set())
    def MakeIfNE(path):
        if not access(path,F_OK):
            mkdir(path)
        chdir(path)
        return path
    def message(self,mess,style=wx.OK):
        dialog=wx.MessageDialog(self,mess,'Information',style)
        choice=dialog.ShowModal()
        dialog.Destroy()
        return choice
    class control:
        def settle(self,BadLinkSet,URL):
            self.BadLinkSet=BadLinkSet
            self.URL=URL
        def handle(self,ErrorMess):
            print '  Error: %s, URL: %s' % (ErrorMess,self.URL)
            self.BadLinkSet.add(self.URL)
    EA=wx.EXPAND|wx.ALL
    CA=wx.ALIGN_CENTER|wx.ALL
    CVA=wx.ALIGN_CENTER_VERTICAL|wx.ALL
    VER=wx.VERTICAL
    HOR=wx.HORIZONTAL
    EXP=wx.EXPAND
    AC=wx.ALIGN_CENTER
    AR=wx.ALIGN_RIGHT
    class MainWin(wx.Frame):
        message=message
        control=control()
        def __init__(self):
            def AboutDisplay(event):
                info=wx.AboutDialogInfo()
                info.AddDeveloper('Patrashov Alexey Sergeevich, Moscow, RF')
                info.SetCopyright('(C) All right reserved.')
                info.SetDescription(
                    'The utility for download of all images from the site. '
                    'Intuitive interface and the ability to modification '
                    'program by self. Program has many log files and options.')
                info.SetLicense('GNU GPL license.')
                info.SetName('Image Downloader')
                info.SetVersion('0.0.1')
                info.SetWebSite(('mailto:alex-wolf-75@mail.ru',
                                'alex-wolf-75@mail.ru'))
                wx.AboutBox(info)
            def OnClickSetDir(event):
                dialog=wx.DirDialog(self)
                path=MakeIfNE(TextDir.GetValue())
                if dialog.ShowModal()==wx.ID_OK:
                    path=dialog.GetPath()
                    chdir(path)
                    TextDir.SetValue(path)
                dialog.Destroy()
            def OnClickOK(event):
                def handle():
                    def HandleLinks():
                        def OnError(ErrorMess):
                            self.control.handle(ErrorMess)
                            BadLinksDisp.SetLabel('%i' % (BadA.size(),))
                            BadImgsDisp.SetLabel('%i' % (BadImages.size(),))
                            if not self.SuppressAll.GetValue():
                                self.message(ErrorMess)
                        ImageFound,ImageURL=False,''
                        try:
                            if CurrentURL!='':
                                SetA,SetImages=GetURLsSets(self,CurrentURL,MainURL)
                                NewFoundedA.update(SetA)
                                NewFoundedA.difference_update(VisitedA.GetLog())
                                if CurrentURL in NewFoundedA:
                                    NewFoundedA.remove(CurrentURL)
                                SetImages.difference_update(SavedImages.GetLog())
                                ImagesLinks.join(SetImages)
                            while ImagesLinks.size()>0 and self.proceed:
                                ImageFound,ImageURL=True,ImagesLinks.pop()
                                FileName='%i%s' % (SavedImages.size(),
                                                   FileNameFromURL(ImageURL))
                                status,body=GetBody(self,ImageURL,'img')
                                print '  URL: %s, ImageURL: %s, Status: %i' % \
                                      (CurrentURL,ImageURL,status)
                                if status==200:
                                    f=file(FileName,'wb')
                                    f.write(body)
                                    f.close()
                                    compare.write('%s %s\n' % \
                                                  (FileName,CurrentURL))
                                    SavedImages.add(ImageURL)
                                elif status in (301,302):
                                    body=urljoin(ImageURL,body)
                                    if body not in SavedImages.GetLog():
                                        ImagesLinks.add(body)
                                    SavedImages.add(ImageURL)
                                else:
                                    BadImgsDisp.SetLabel('%i' % (BadImages.size(),))
                                ImagesDisp.SetLabel('%i' % (ImagesLinks.size(),))
                                SavedDisp.SetLabel('%i' % (SavedImages.size(),))
                                app.Yield()
                            ImageURL=''
                            VisitedA.add(CurrentURL)
                            if ImageFound and JoinOnImage.GetValue() \
                               or JoinProcess.GetValue():
                                FoundedA.join(NewFoundedA)
                                FoundedA.join(ReservedA)
                            else:
                                ReservedA.join(NewFoundedA)
                        except BadStatusLine:
                            OnError('Bad Status Line')
                        except CannotSendRequest:
                            OnError('Cannot send request')
                        except IncompleteRead:
                            OnError('Incomplete Read')
                        except socket.error:
                            OnError('Socket error')
                        except IOError:
                            OnError('IOError')
                        except TypeError:
                            OnError('TypeError')
                        except AttributeError:
                            OnError('AttributeError')
                    self.ExtStr=ExtStr(TextExt.GetValue())
                    self.ExeptTuple=ExeptTuple(
                        ','.join((TextExept.GetValue(),TextExt.GetValue())))
                    self.TextUA=TextUA.GetValue()
                    MakeIfNE(TextDir.GetValue())
                    MainURL=RemoveWWW(TextURL.GetValue())
                    FoundedA=LogFile('founded',(MainURL,))
                    (ReservedA,VisitedA,BadA,ImagesLinks,SavedImages,
                     BadImages)=LogList=tuple(map(lambda x:LogFile(x),
                        ('reserved','visited','badfounded',
                         'images','saved','badimages')))
                    LogList=(FoundedA,)+LogList
                    self.BadA,self.BadImages=BadA,BadImages
                    compare=open('compare.log','a')
                    if JoinStart.GetValue():
                        FoundedA.join(ReservedA)
                    FoundedA.rem(VisitedA,BadA)
                    ReservedA.rem(FoundedA,VisitedA,BadA)
                    SetImages,NewFoundedA=set(),set()
                    counter=NumLinks.GetValue()
                    for n in xrange(1,counter+1):
                        if self.proceed and \
                           (FoundedA.size()>0 or ImagesLinks.size()>0):
                            NumLeft.SetLabel('%i' % (counter-n,))
                            CurrentURL=FoundedA.pop()
                            VisitedDisp.SetLabel('%i' % (VisitedA.size(),))
                            NumHandled.SetLabel('%i' % (n,))
                            HandleLinks()
                            FoundedDisp.SetLabel('%i' % (FoundedA.size(),))
                            Reserved.SetLabel('%i' % (ReservedA.size(),))
                            BadLinksDisp.SetLabel('%i' % (BadA.size(),))
                            BadImgsDisp.SetLabel('%i' % (BadImages.size(),))
                            app.Yield()
                    if JoinEnd.GetValue():
                        FoundedA.join(NewFoundedA,ReservedA)
                    else:
                        ReservedA.join(NewFoundedA)
                    FoundedDisp.SetLabel('%i' % (FoundedA.size(),))
                    Reserved.SetLabel('%i' % (ReservedA.size(),))
                    app.Yield()
                    map(lambda x:x.WriteLog(),LogList)
                    compare.close()
                    return FoundedA.size()
                def SwitchOnStart(enable):
                    map(lambda x:x.Enable(enable),CBL+\
                        (OK,SetDir,TextURL,TextDir,TextExt,TextExept,TextUA))
                SwitchOnStart(False)
                key=wx.ID_YES
                while key==wx.ID_YES:
                    self.proceed=True
                    map(lambda x:x.SetLabel('waiting'),STL)
                    if not (handle()>0 and self.proceed \
                            and self.SuppressAll.GetValue()):
                        key=self.message('Download completed. Continue?',
                                         wx.YES_NO|wx.YES_DEFAULT)
                SwitchOnStart(True)
            def AddGrowableCols(grid,cols):
                map(lambda x:grid.AddGrowableCol(
                    *(x if type(x).__name__=='tuple' else (x,0))),cols)
            def stop(event):
                self.proceed=False
            wx.Frame.__init__(self,None,-1,'Image Downloader 0.0.1')
            self.SetSizer(wx.BoxSizer(VER))
            panel=wx.Panel(self,-1)
            self.GetSizer().Add(panel,1,EXP)
            panel.SetSizer(wx.BoxSizer(VER))
            box=panel.GetSizer()
            box.Add(wx.StaticText(panel,-1,'Enter the URL below and '
                                  'select the directory to download '
                                  'in the next field'),0,CA,5)
            grid=wx.FlexGridSizer(2,2,10,10)
            grid.AddGrowableCol(1)
            About=wx.Button(panel,wx.ID_ABOUT)
            About.Bind(wx.EVT_BUTTON,AboutDisplay)
            TextURL=wx.TextCtrl(panel,-1,'http://xxx.xxxxxxxxxxxxxxxxxx.xx/')
            SetDir=wx.Button(panel,wx.ID_OPEN)
            SetDir.Bind(wx.EVT_BUTTON,OnClickSetDir)
            TextDir=wx.TextCtrl(panel,-1,getcwd())
            grid.AddMany(((About,0,EXP),(TextURL,0,EXP),
                          (SetDir,0,EXP),(TextDir,0,EXP)))
            box.Add(grid,0,EA,5)
            grid=wx.FlexGridSizer(2,3,10,10)
            AddGrowableCols(grid,(0,1,2))
            JoinStart=wx.CheckBox(panel,-1,'Join founded links on start')
            JoinOnImage=wx.CheckBox(panel,-1,'Join founded links if image found')
            JoinProcess=wx.CheckBox(panel,-1,'Join founded links in process')
            JoinEnd=wx.CheckBox(panel,-1,'Join founded links at end')
            CBL=(JoinStart,JoinOnImage,JoinProcess,JoinEnd)
            map(lambda x:x.SetValue(True),CBL)
            grid.AddMany(CBL)
            self.DecodeSuppress=wx.CheckBox(
                panel,-1,'Suppress decode error messages')
            grid.Add(self.DecodeSuppress)
            self.SuppressAll=wx.CheckBox(
                panel,-1,'Suppress all messages')
            grid.Add(self.SuppressAll)
            box.Add(grid,0,EA,5)
            grid=wx.GridSizer(2,2,10,10)
            TextExt=wx.TextCtrl(panel,-1,'jpg, jpeg, gif, png, tif, tiff')
            TextExept=wx.TextCtrl(panel,-1,'mp3, mpeg, mp4, flv, avi, swf')
            grid.AddMany(
                ((wx.StaticText(panel,-1,'Collect files with extensions'),0,AC),
                 (wx.StaticText(panel,-1,'Do not go by links with extensions'),
                  0,AC),(TextExt,0,EXP),(TextExept,0,EXP)))
            box.Add(grid,0,EA,5)
            box.Add(wx.StaticText(panel,-1,'User agent identification '
                                  'string of browser'),0,CA,5)
            TextUA=wx.TextCtrl(panel,-1,'Mozilla/5.0 (X11; Linux x86_64; '
                               'rv:5.0), Gecko/20100101 Firefox/5.0')
            box.Add(TextUA,0,EA,5)
            grid=wx.FlexGridSizer(2,6,10,10)
            AddGrowableCols(grid,(1,3,5))
            (NumLeft,NumHandled,Reserved,FoundedDisp,VisitedDisp,BadLinksDisp,
             ImagesDisp,SavedDisp,BadImgsDisp)=STL=map(lambda x:wx.StaticText(
                 panel,-1,'waiting',style=AR),xrange(9))
            grid.AddMany(
                (wx.StaticText(panel,-1,'Left counter:'),(NumLeft,0,EXP),
                 wx.StaticText(panel,-1,'Founded or left links:'),
                 (FoundedDisp,0,EXP),
                 wx.StaticText(panel,-1,'Founded or left images:'),
                 (ImagesDisp,0,EXP),
                 wx.StaticText(panel,-1,'Handled count:'),(NumHandled,0,EXP),
                 wx.StaticText(panel,-1,'Visited links:'),(VisitedDisp,0,EXP),
                 wx.StaticText(panel,-1,'Saved images:'),(SavedDisp,0,EXP),
                 wx.StaticText(panel,-1,'Reserved links:'),(Reserved,0,EXP),
                 wx.StaticText(panel,-1,'Bad links:'),(BadLinksDisp,0,EXP),
                 wx.StaticText(panel,-1,'Bad images links:'),(BadImgsDisp,0,EXP)))
            box.Add(grid,0,EA,5)
            box.AddStretchSpacer()
            box=wx.BoxSizer(HOR)
            box.Add(wx.StaticText(panel,-1,'Number of links to handle'),0,CVA,5)
            NumLinks=wx.SpinCtrl(panel,-1,'1000',min=1,max=1000000,initial=1000)
            box.Add(NumLinks,0,CVA,5)
            box.AddStretchSpacer()
            OK=wx.Button(panel,wx.ID_OK)
            OK.Bind(wx.EVT_BUTTON,OnClickOK)
            box.Add(OK,0,CVA,5)
            STOP=wx.Button(panel,wx.ID_STOP)
            STOP.Bind(wx.EVT_BUTTON,stop)
            box.Add(STOP,0,CVA,5)
            panel.GetSizer().Add(box,0,EXP)
            self.Fit()
            self.Show()
    class App(wx.App):
        def __init__(self):
            wx.App.__init__(self)
            frame=MainWin()
    app=App()
    app.MainLoop()
    
    Next version may be on attached file. Program had tested on some sites.
  • I present new version of ImageDownloader with multithread support.
    from ConfigParser import RawConfigParser
    from chardet import detect
    from httplib import BadStatusLine,CannotSendRequest,HTTPConnection,IncompleteRead
    from os import access,chdir,getcwd,mkdir,F_OK,R_OK
    from os.path import splitext
    from Queue import Queue
    from re import findall,I
    from threading import Thread
    from urlparse import urljoin,urlparse,urlunparse
    import codecs,socket,wx
    
    FileNameFromURL=lambda URL:URL.replace('http://','-').replace('/','-')
    AddGrowableCols=lambda grid,cols:map(lambda x:grid.AddGrowableCol(*(
        x if type(x).__name__=='tuple' else (x,0))),cols)
    prepare=lambda founded:map(lambda x:urljoin(URL,x).replace('../',''),founded)
    GetCharset=lambda string:detect(string)['encoding']
    
    class LogFile:
        add=lambda self,item:self.set.add(item)
        join=lambda self,*AddLogs:self.set.update(self.JoinLogs(*AddLogs))
        JoinLogs=lambda self,*logs:reduce(
            lambda x,y:x|(y.set if hasattr(y,'set') else y),logs,set())
        GetLog=lambda self:self.set
        rem=lambda self,*RemLogs:self.set.difference_update(self.JoinLogs(*RemLogs))
        size=lambda self:len(self.set)
    
        def __init__(self,name):
            self.FileName=name+'.log'
    
        def pop(self,n):
            pool=[]
            while len(pool)<n and len(self.set)>0:
                pool.append(self.set.pop())
            return tuple(pool)
    
        def WriteLog(self):
            f=codecs.open(self.FileName,'w',CSD)
            f.write('\n'.join(self.set))
            f.close()
    
        def ReadLog(self):
            if access(self.FileName,R_OK):
                f=codecs.open(self.FileName,'r',CSD)
                self.set=set(map(lambda x:x.strip('\n\r'),f.readlines()))
                f.close()
            else:
                self.set=set()
    
    class CfgFile:
        FileName='imagedownloader.cfg'
        section='GUI'
    
        def __init__(self,GUI):
            self.GUI=GUI
            if access(self.FileName,R_OK):
                self.ReadCfg()
    
        def ReadCfg(self):
            if access(self.FileName,R_OK):
                config=RawConfigParser()
                config.read(self.FileName)
                for name in ('TextURL','TextRE','TextExt','TextExept','TextUA'):
                    getattr(self.GUI,name).ChangeValue(config.get(self.section,name))
                for name in ('JoinStart','JoinOnImage','JoinProcess',
                             'JoinEnd','DecodeSuppress','SuppressAll'):
                    getattr(self.GUI,name).SetValue(config.getboolean(self.section,name))
                for name in ('NumLinks','NumThreads'):
                    getattr(self.GUI,name).SetValue(config.getint(self.section,name))
    
        def WriteCfg(self):
            config=RawConfigParser()
            config.add_section(self.section)
            for name in ('TextURL','JoinStart','JoinOnImage','JoinProcess','JoinEnd','DecodeSuppress',
                         'SuppressAll','TextRE','TextExt','TextExept','TextUA','NumLinks','NumThreads'):
                config.set(self.section,name,getattr(self.GUI,name).GetValue())
            f=open(self.FileName,'w')
            config.write(f)
            f.close()
    
    def GetBody(GUI,URL):
        PartsURL=urlparse(URL)
        try:
            conn=HTTPConnection(PartsURL[1])
            conn.request('GET',urlunparse(('','')+PartsURL[2:]),headers={
                'Referer':urlunparse(PartsURL[:2]+('',)*4),'User-Agent':GUI.UserAgent})
            r=conn.getresponse()
            status=r.status
            if status==200:
                body=r.read()
                if len(body)==0:
                    status=0
                    GUI.control.HandleCommon('Zero body length',URL)
            elif status in (301,302):
                body=r.getheader('location')
            else:
                body='Status %s returned' % status
            conn.close()
        except BadStatusLine:
            return (URL,0,'Bad Status Line')
        except CannotSendRequest:
            return (URL,0,'Cannot send request')
        except IncompleteRead:
            return (URL,0,'Incomplete Read')
        except socket.error:
            return (URL,0,'Socket error')
        except IOError:
            return (URL,0,'IOError')
        except TypeError:
            return (URL,0,'TypeError')
        except AttributeError:
            return (URL,0,'AttributeError')
        except UnicodeEncodeError:
            return (URL,0,'UnicodeEncodeError')
        return (URL,status,body)
    
    def GetURLsSets(GUI,URL,body,MainURLs):
        def prepare(found):
            ReadyLinks=[]
            for link in found:
                try:
                    ReadyLinks.append(unicode(link))
                except UnicodeDecodeError:
                    try:
                        ReadyLinks.append(unicode(link,GetCharset(link)))
                    except UnicodeDecodeError as BadStr:
                        print '  Bad link:',BadStr.object
                        GUI.control.HandleUnicode('UnicodeDecodeError.',URL)
                except UnicodeEncodeError:
                    try:
                        ReadyLinks.append(unicode(link,GetCharset(link)))
                    except UnicodeEncodeError as BadStr:
                        print '  Bad link:',BadStr.object
                        GUI.control.HandleUnicode('UnicodeEncodeError.',URL)
            return map(lambda x:urljoin(URL,x.strip()).replace('../',''),ReadyLinks)
    
        def IsSubDomain(URL):
            PartURL=urlparse(URL)[1]
            return any(map(lambda x:PartURL.endswith(x),MainURLs))
    
        return (set(filter(lambda x:IsSubDomain(x) and splitext(x)[-1].lower() not in GUI.ExeptTuple,
                prepare(findall('<\s*a[^>]*?(?<=href)\s*=\s*[\'|\"]\s*([^\'\"]*?)\s*[\'|\"]',body,I)))),
                set(prepare(findall(GUI.ExtStr,body,I))))
    
    def MakeIfNE(path):
        if not access(path,F_OK):
            mkdir(path)
        chdir(path)
    
    def request(GUI,response):
        response.append(GetBody(GUI,LinkQueue.get()))
        LinkQueue.task_done()
    
    def GetResponses(GUI,URLs):
        response=[]
        for URL in URLs:
            LinkQueue.put(URL)
            t=Thread(None,request,None,(GUI,response))
            t.daemon=True
            t.start()
        LinkQueue.join()
        return tuple(response)
    
    def HandleLinks(GUI,URLs,MainURLs,NewFoundedA,SetImages,compare):
        ImageFound,ImageURL,SetA,SetImages=False,'',set(),set()
        if URLs!=():
            for CurrentURL,status,body in GetResponses(GUI,URLs):
                if status==200:
                    SetA,SetImages=GetURLsSets(GUI,CurrentURL,body,MainURLs)
                    VisitedA.add(CurrentURL)
                elif status in (301,302):
                    SetA,SetImage=set((urljoin(CurrentURL,body),)),set()
                    VisitedA.add(CurrentURL)
                else:
                    BadA.add(CurrentURL)
                    GUI.control.HandleCommon(body,CurrentURL)
                NewFoundedA.update(SetA)
                NewFoundedA.difference_update(VisitedA.GetLog()|BadA.GetLog()|set((CurrentURL,)))
                SetImages.difference_update(SavedImages.GetLog()|BadImages.GetLog())
                ImagesLinks.join(SetImages)
        while ImagesLinks.size()>0 and GUI.proceed:
            ImageFound=True
            for ImageURL,status,body in GetResponses(GUI,ImagesLinks.pop(GUI.NumThreads.GetValue())):
                FileName='%i%s' % (SavedImages.size(),FileNameFromURL(ImageURL))
                if status==200:
                    f=open(FileName,'wb')
                    f.write(body)
                    f.close()
                    compare.write('%s %s\n' % (FileName,CurrentURL))
                    SavedImages.add(ImageURL)
                elif status in (301,302):
                    SavedImages.add(ImageURL)
                    ImagesLinks.add(urljoin(ImageURL,body))
                    ImagesLinks.rem(SavedImages,BadImages)
                else:
                    BadImages.add(ImageURL)
                    GUI.BadImgsDisp.SetLabel('%i' % BadImages.size())
                    GUI.control.HandleCommon(body,ImageURL)
                GUI.ImagesDisp.SetLabel('%i' % ImagesLinks.size())
                GUI.SavedDisp.SetLabel('%i' % SavedImages.size())
                app.Yield()
        ImageURL=''
        if ImageFound and GUI.JoinOnImage.GetValue() or GUI.JoinProcess.GetValue():
            FoundedA.join(NewFoundedA)
            FoundedA.join(ReservedA)
        else:
            ReservedA.join(NewFoundedA)
    
    def handle(GUI,MainURLs):
        compare=codecs.open('compare.log','a',CSD)
        if GUI.JoinStart.GetValue():
            FoundedA.join(ReservedA)
        FoundedA.rem(VisitedA,BadA)
        ReservedA.rem(FoundedA,VisitedA,BadA)
        ImagesLinks.rem(SavedImages,BadImages)
        NewFoundedA,SetImages=set(),set()
        NumThreads=GUI.NumThreads.GetValue()
        counter=GUI.NumLinks.GetValue()/NumThreads
        for n in xrange(1,counter+1):
            if GUI.proceed and (FoundedA.size()>0 or ImagesLinks.size()>0):
                GUI.NumLeft.SetLabel('%i' % ((counter-n)*NumThreads))
                GUI.VisitedDisp.SetLabel('%i' % VisitedA.size())
                GUI.NumHandled.SetLabel('%i' % (n*NumThreads))
                HandleLinks(GUI,FoundedA.pop(NumThreads),MainURLs,NewFoundedA,SetImages,compare)
                GUI.FoundedDisp.SetLabel('%i' % FoundedA.size())
                GUI.Reserved.SetLabel('%i' % ReservedA.size())
                GUI.BadLinksDisp.SetLabel('%i' % BadA.size())
                GUI.BadImgsDisp.SetLabel('%i' % BadImages.size())
                app.Yield()
        if GUI.JoinEnd.GetValue():
            FoundedA.join(NewFoundedA,ReservedA)
        else:
            ReservedA.join(NewFoundedA)
        GUI.FoundedDisp.SetLabel('%i' % FoundedA.size())
        GUI.Reserved.SetLabel('%i' % ReservedA.size())
        app.Yield()
        map(lambda x:x.WriteLog(),LogList)
        compare.close()
    
    def RemoveWWW(URL):
        PartsURL=urlparse(URL.strip())
        return urlunparse((PartsURL[0],PartsURL[1].lstrip('Ww').lstrip('.'))+PartsURL[2:])
    
    CSD='utf_8'
    FoundedA,ReservedA,VisitedA,BadA,ImagesLinks,SavedImages,BadImages=LogList=tuple(map(
        lambda x:LogFile(x),('founded','reserved','visited','badfounded','images','saved','badimages')))
    LinkQueue=Queue()
    EA=wx.EXPAND|wx.ALL
    CA=wx.ALIGN_CENTER|wx.ALL
    CVA=wx.ALIGN_CENTER_VERTICAL|wx.ALL
    VER=wx.VERTICAL
    HOR=wx.HORIZONTAL
    EXP=wx.EXPAND
    AC=wx.ALIGN_CENTER
    AR=wx.ALIGN_RIGHT
    
    class control:
        HandleUnicode=lambda self,ErrorMess,URL:self.HandleCommon(
            ErrorMess,URL,self.GUI.DecodeSuppress.GetValue())
    
        def __init__(self,GUI):
            self.GUI=GUI
    
        def HandleCommon(self,ErrorMess,URL,suppress=False):
            print '  Error: %s, URL: %s' % (ErrorMess,URL)
            if not (suppress or self.GUI.SuppressAll.GetValue()):
                self.GUI.message(ErrorMess)
    
    class MainWin(wx.Frame):
        def __init__(self):
            wx.Frame.__init__(self,None,-1,'Image Downloader 0.0.2')
            self.SetSizer(wx.BoxSizer(VER))
            panel=wx.Panel(self,-1)
            self.GetSizer().Add(panel,1,EXP)
            panel.SetSizer(wx.BoxSizer(VER))
            box=panel.GetSizer()
            box.Add(wx.StaticText(panel,-1,'Enter the URL below and select the directory '
                                  'to download in the next field'),0,CA,5)
            grid=wx.FlexGridSizer(2,2,10,10)
            grid.AddGrowableCol(1)
            About=wx.Button(panel,wx.ID_ABOUT)
            About.Bind(wx.EVT_BUTTON,self.AboutDisplay)
            self.TextURL=wx.TextCtrl(panel,-1,'http://xxx.xxxxxxxx.xx/, http://yyy.yyyyyy.yy/')
            SetDir=wx.Button(panel,wx.ID_OPEN)
            SetDir.Bind(wx.EVT_BUTTON,self.OnClickSetDir)
            self.TextDir=wx.TextCtrl(panel,-1,getcwd())
            grid.AddMany(((About,0,EXP),(self.TextURL,0,EXP),(SetDir,0,EXP),(self.TextDir,0,EXP)))
            box.Add(grid,0,EA,5)
            grid=wx.FlexGridSizer(2,3,10,10)
            AddGrowableCols(grid,(0,1,2))
            self.JoinStart=wx.CheckBox(panel,-1,'Join founded links on start')
            self.JoinOnImage=wx.CheckBox(panel,-1,'Join founded links if image found')
            self.JoinProcess=wx.CheckBox(panel,-1,'Join founded links in process')
            self.JoinEnd=wx.CheckBox(panel,-1,'Join founded links at end')
            self.CBL=(self.JoinStart,self.JoinOnImage,self.JoinProcess,self.JoinEnd)
            map(lambda x:x.SetValue(True),self.CBL)
            self.DecodeSuppress=wx.CheckBox(panel,-1,'Suppress decode error messages')
            self.SuppressAll=wx.CheckBox(panel,-1,'Suppress all messages')
            grid.AddMany(self.CBL+(self.DecodeSuppress,self.SuppressAll))
            box.Add(grid,0,EA,5)
            grid=wx.GridSizer(2,3,10,10)
            self.TextRE=wx.TextCtrl(panel,-1,'[\'|\"]([^\'\"]*?(?:%s))\s*[\'|\"]')
            self.TextExt=wx.TextCtrl(panel,-1,'jpg, jpeg, gif, png, tif, tiff')
            self.TextExept=wx.TextCtrl(panel,-1,'mp3, mpeg, mp4, flv, avi, swf')
            grid.AddMany(((wx.StaticText(panel,-1,'Regular expression'),0,AC),
                          (wx.StaticText(panel,-1,'Collect files with extensions'),0,AC),
                          (wx.StaticText(panel,-1,'Do not pass to extensions'),0,AC),
                          (self.TextRE,0,EXP),(self.TextExt,0,EXP),(self.TextExept,0,EXP)))
            box.Add(grid,0,EA,5)
            box.Add(wx.StaticText(panel,-1,'User agent identification string of browser'),0,CA,5)
            self.TextUA=wx.TextCtrl(panel,-1,'Mozilla/5.0 (X11; Linux x86_64; '
                                    'rv:6.0.1) Gecko/20100101 Firefox/6.0.1')
            box.Add(self.TextUA,0,EA,5)
            grid=wx.FlexGridSizer(2,6,10,10)
            AddGrowableCols(grid,(1,3,5))
            (self.NumLeft,self.NumHandled,self.Reserved,self.FoundedDisp,self.VisitedDisp,
             self.BadLinksDisp,self.ImagesDisp,self.SavedDisp,self.BadImgsDisp)=self.STL=map(
                 lambda x:wx.StaticText(panel,-1,'waiting',style=AR),xrange(9))
            grid.AddMany((wx.StaticText(panel,-1,'Left counter:'),(self.NumLeft,0,EXP),
                          wx.StaticText(panel,-1,'Founded or left links:'),(self.FoundedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Founded or left images:'),(self.ImagesDisp,0,EXP),
                          wx.StaticText(panel,-1,'Handled count:'),(self.NumHandled,0,EXP),
                          wx.StaticText(panel,-1,'Visited links:'),(self.VisitedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Saved images:'),(self.SavedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Reserved links:'),(self.Reserved,0,EXP),
                          wx.StaticText(panel,-1,'Bad links:'),(self.BadLinksDisp,0,EXP),
                          wx.StaticText(panel,-1,'Bad images links:'),(self.BadImgsDisp,0,EXP)))
            box.Add(grid,0,EA,5)
            box.AddStretchSpacer()
            box=wx.BoxSizer(HOR)
            self.NumLinks=wx.SpinCtrl(panel,-1,'1000',min=1,max=1000000,initial=1000)
            self.NumThreads=wx.SpinCtrl(panel,-1,'10',min=1,max=100,initial=10)
            box.AddMany(((wx.StaticText(panel,-1,'Number of links:'),0,CVA,5),(self.NumLinks,0,CVA,5),
                         (wx.StaticText(panel,-1,'Number of threads:'),0,CVA,5),(self.NumThreads,0,CVA,5)))
            box.AddStretchSpacer()
            OK=wx.Button(panel,wx.ID_OK)
            OK.Bind(wx.EVT_BUTTON,self.OnClickOK)
            box.Add(OK,0,CVA,5)
            self.SEW=(OK,SetDir,self.TextURL,self.TextDir,self.TextRE,self.TextExt,
                      self.TextExept,self.TextUA,self.NumLinks,self.NumThreads)
            STOP=wx.Button(panel,wx.ID_STOP)
            STOP.Bind(wx.EVT_BUTTON,self.stop)
            box.Add(STOP,0,CVA,5)
            panel.GetSizer().Add(box,0,EXP)
            self.config=CfgFile(self)
            self.control=control(self)
            self.Fit()
            self.Show()
    
        def stop(self,event):
            self.proceed=False
    
        def message(self,mess,style=wx.OK):
            dialog=wx.MessageDialog(self,mess,'Information',style)
            choice=dialog.ShowModal()
            dialog.Destroy()
            return choice
    
        def AboutDisplay(self,event):
            info=wx.AboutDialogInfo()
            info.AddDeveloper('Patrashov Alexey Sergeevich, Moscow, RF')
            info.SetCopyright('(C) All right reserved.')
            info.SetDescription('The utility for download of all images from the site. '
                                'Intuitive interface and the ability to modification '
                                'program by self. Program has many log files and options.')
            info.SetLicense('GNU GPL license.')
            info.SetName('Image Downloader')
            info.SetVersion('0.0.1')
            info.SetWebSite(('mailto:alex-wolf-75@mail.ru','alex-wolf-75@mail.ru'))
            wx.AboutBox(info)
    
        def OnClickSetDir(self,event):
            dialog=wx.DirDialog(self)
            MakeIfNE(self.TextDir.GetValue())
            if dialog.ShowModal()==wx.ID_OK:
                path=dialog.GetPath()
                chdir(path)
                self.TextDir.ChangeValue(path)
            dialog.Destroy()
            self.config.ReadCfg()
    
        def OnClickOK(self,event):
            SwitchOnStart=lambda enable:map(lambda x:x.Enable(enable),self.CBL+self.SEW)
            StrToTuple=lambda text:tuple(map(lambda x:x.strip(),text.split(',')))
            self.ExtStr=self.TextRE.GetValue() % '|'.join(
                map(lambda x:'\.'+x,StrToTuple(self.TextExt.GetValue())))
            self.ExeptTuple=tuple(map(lambda x:'.'+x,StrToTuple(self.TextExept.GetValue())+\
                                      StrToTuple(self.TextExt.GetValue())))
            self.UserAgent=self.TextUA.GetValue()
            MakeIfNE(self.TextDir.GetValue())
            map(lambda x:x.ReadLog(),LogList)
            self.config.WriteCfg()
            SwitchOnStart(False)
            MainURLs=set(map(RemoveWWW,self.TextURL.GetValue().split(',')))
            FoundedA.join(MainURLs)
            key=wx.ID_YES
            while key==wx.ID_YES:
                self.proceed=True
                map(lambda x:x.SetLabel('waiting'),self.STL)
                handle(self,set(map(lambda x:urlparse(x)[1],MainURLs)))
                if not (((self.JoinStart.GetValue() or self.JoinOnImage.GetValue() \
                          or self.JoinProcess.GetValue() or self.JoinEnd.GetValue()) \
                         and ReservedA.size()>0 or FoundedA.size()>0) \
                        and self.proceed and self.SuppressAll.GetValue()):
                    key=self.message('Download completed. Continue?',wx.YES_NO|wx.YES_DEFAULT)
            SwitchOnStart(True)
    
    class App(wx.App):
        def __init__(self):
            wx.App.__init__(self)
            frame=MainWin()
    
    app=App()
    app.MainLoop()
    
  • Bug fix.
    from ConfigParser import RawConfigParser
    from chardet import detect
    from httplib import BadStatusLine,CannotSendRequest,HTTPConnection,IncompleteRead
    from os import access,chdir,getcwd,mkdir,F_OK,R_OK
    from os.path import splitext
    from Queue import Queue
    from re import findall,I
    from threading import Thread
    from urlparse import urljoin,urlparse,urlunparse
    import codecs,socket,wx
    
    FileNameFromURL=lambda URL:URL.replace('http://','-').replace('/','-')
    AddGrowableCols=lambda grid,cols:map(lambda x:grid.AddGrowableCol(*(
        x if type(x).__name__=='tuple' else (x,0))),cols)
    prepare=lambda founded:map(lambda x:urljoin(URL,x).replace('../',''),founded)
    GetCharset=lambda string:detect(string)['encoding']
    
    class LogFile:
        add=lambda self,item:self.set.add(item)
        join=lambda self,*AddLogs:self.set.update(self.JoinLogs(*AddLogs))
        JoinLogs=lambda self,*logs:reduce(
            lambda x,y:x|(y.set if hasattr(y,'set') else y),logs,set())
        GetLog=lambda self:self.set
        rem=lambda self,*RemLogs:self.set.difference_update(self.JoinLogs(*RemLogs))
        size=lambda self:len(self.set)
    
        def __init__(self,name):
            self.FileName=name+'.log'
    
        def pop(self,n):
            pool=[]
            while len(pool)<n and len(self.set)>0:
                pool.append(self.set.pop())
            return tuple(pool)
    
        def WriteLog(self):
            f=codecs.open(self.FileName,'w',CSD)
            f.write('\n'.join(self.set))
            f.close()
    
        def ReadLog(self):
            if access(self.FileName,R_OK):
                f=codecs.open(self.FileName,'r',CSD)
                self.set=set(map(lambda x:x.strip('\n\r'),f.readlines()))
                f.close()
            else:
                self.set=set()
    
    class CfgFile:
        FileName='imagedownloader.cfg'
        section='GUI'
    
        def __init__(self,GUI):
            self.GUI=GUI
            if access(self.FileName,R_OK):
                self.ReadCfg()
    
        def ReadCfg(self):
            if access(self.FileName,R_OK):
                config=RawConfigParser()
                config.read(self.FileName)
                for name in ('TextURL','TextRE','TextExt','TextExept','TextUA'):
                    getattr(self.GUI,name).ChangeValue(config.get(self.section,name))
                for name in ('JoinStart','JoinOnImage','JoinProcess',
                             'JoinEnd','DecodeSuppress','SuppressAll'):
                    getattr(self.GUI,name).SetValue(config.getboolean(self.section,name))
                for name in ('NumLinks','NumThreads'):
                    getattr(self.GUI,name).SetValue(config.getint(self.section,name))
    
        def WriteCfg(self):
            config=RawConfigParser()
            config.add_section(self.section)
            for name in ('TextURL','JoinStart','JoinOnImage','JoinProcess','JoinEnd','DecodeSuppress',
                         'SuppressAll','TextRE','TextExt','TextExept','TextUA','NumLinks','NumThreads'):
                config.set(self.section,name,getattr(self.GUI,name).GetValue())
            f=open(self.FileName,'w')
            config.write(f)
            f.close()
    
    def GetBody(GUI,URL):
        class Chunking(Thread):
            def __init__(self, r):
                self.r = r
                self.body = ''
                Thread.__init__(self)
    
            def run(self):
                self.body = r.read(1024)
    
        PartsURL=urlparse(URL)
        try:
            conn=HTTPConnection(PartsURL[1], timeout = 60)
            conn.request('GET',urlunparse(('','')+PartsURL[2:]),headers={
                'Referer':urlunparse(PartsURL[:2]+('',)*4),'User-Agent':GUI.UserAgent})
            r=conn.getresponse()
            status=r.status
            if status==200:
                chunk = Chunking(r)
                body = ''
                chunk.run()
                if chunk.isAlive():
                    chunk.join(60)
                while len(chunk.body) > 0 and not chunk.isAlive():
                    body += chunk.body
                    chunk.run()
                    if chunk.isAlive():
                        chunk.join(60)
                if len(body)==0:
                    status=0
                    GUI.control.HandleCommon('Zero body length',URL)
                elif chunk.isAlive():
                    status=0
                    GUI.control.HandleCommon('Download is incompleted',URL)
            elif status in (301,302):
                body=r.getheader('location')
            else:
                body='Status %s returned' % status
            conn.close()
        except BadStatusLine:
            return (URL,0,'Bad Status Line')
        except CannotSendRequest:
            return (URL,0,'Cannot send request')
        except IncompleteRead:
            return (URL,0,'Incomplete Read')
        except socket.error:
            return (URL,0,'Socket error')
        except IOError:
            return (URL,0,'IOError')
        except TypeError:
            return (URL,0,'TypeError')
        except AttributeError:
            return (URL,0,'AttributeError')
        except UnicodeEncodeError:
            return (URL,0,'UnicodeEncodeError')
        return (URL,status,body)
    
    def GetURLsSets(GUI,URL,body,MainURLs):
        def prepare(found):
            ReadyLinks=[]
            for link in found:
                try:
                    ReadyLinks.append(unicode(link))
                except UnicodeDecodeError:
                    try:
                        ReadyLinks.append(unicode(link,GetCharset(link)))
                    except UnicodeDecodeError as BadStr:
                        print '  Bad link:',BadStr.object
                        GUI.control.HandleUnicode('UnicodeDecodeError.',URL)
                except UnicodeEncodeError:
                    try:
                        ReadyLinks.append(unicode(link,GetCharset(link)))
                    except UnicodeEncodeError as BadStr:
                        print '  Bad link:',BadStr.object
                        GUI.control.HandleUnicode('UnicodeEncodeError.',URL)
            return map(lambda x:urljoin(URL,x.strip()).replace('../',''),ReadyLinks)
    
        def IsSubDomain(URL):
            PartURL=urlparse(URL)[1]
            return any(map(lambda x:PartURL.endswith(x),MainURLs))
    
        return (set(filter(lambda x:IsSubDomain(x) and splitext(x)[-1].lower() not in GUI.ExeptTuple,
                prepare(findall('<\s*a[^>]*?(?<=href)\s*=\s*[\'|\"]\s*([^\'\"]*?)\s*[\'|\"]',body,I)))),
                set(prepare(findall(GUI.ExtStr,body,I))))
    
    def MakeIfNE(path):
        if not access(path,F_OK):
            mkdir(path)
        chdir(path)
    
    def request(GUI,response):
        response.append(GetBody(GUI,LinkQueue.get()))
        LinkQueue.task_done()
    
    def GetResponses(GUI,URLs):
        response=[]
        for URL in URLs:
            LinkQueue.put(URL)
            t=Thread(None,request,None,(GUI,response))
            t.daemon=True
            t.start()
        LinkQueue.join()
        return tuple(response)
    
    def HandleLinks(GUI,URLs,MainURLs,NewFoundedA,SetImages,compare):
        ImageFound,ImageURL,SetA,SetImages=False,'',set(),set()
        if URLs!=():
            for CurrentURL,status,body in GetResponses(GUI,URLs):
                if status==200:
                    SetA,SetImages=GetURLsSets(GUI,CurrentURL,body,MainURLs)
                    VisitedA.add(CurrentURL)
                elif status in (301,302):
                    SetA,SetImage=set((urljoin(CurrentURL,body),)),set()
                    VisitedA.add(CurrentURL)
                else:
                    BadA.add(CurrentURL)
                    GUI.control.HandleCommon(body,CurrentURL)
                NewFoundedA.update(SetA)
                NewFoundedA.difference_update(VisitedA.GetLog()|BadA.GetLog()|set((CurrentURL,)))
                SetImages.difference_update(SavedImages.GetLog()|BadImages.GetLog())
                ImagesLinks.join(SetImages)
        while ImagesLinks.size()>0 and GUI.proceed:
            ImageFound=True
            for ImageURL,status,body in GetResponses(GUI,ImagesLinks.pop(GUI.NumThreads.GetValue())):
                FileName='%i%s' % (SavedImages.size(),FileNameFromURL(ImageURL))
                if status==200:
                    f=open(FileName[:255],'wb')
                    f.write(body)
                    f.close()
                    compare.write('%s %s\n' % (FileName,CurrentURL))
                    SavedImages.add(ImageURL)
                elif status in (301,302):
                    SavedImages.add(ImageURL)
                    ImagesLinks.add(urljoin(ImageURL,body))
                    ImagesLinks.rem(SavedImages,BadImages)
                else:
                    BadImages.add(ImageURL)
                    GUI.BadImgsDisp.SetLabel('%i' % BadImages.size())
                    GUI.control.HandleCommon(body,ImageURL)
                GUI.ImagesDisp.SetLabel('%i' % ImagesLinks.size())
                GUI.SavedDisp.SetLabel('%i' % SavedImages.size())
                app.Yield()
        ImageURL=''
        if ImageFound and GUI.JoinOnImage.GetValue() or GUI.JoinProcess.GetValue():
            FoundedA.join(NewFoundedA)
            FoundedA.join(ReservedA)
        else:
            ReservedA.join(NewFoundedA)
    
    def handle(GUI,MainURLs):
        compare=codecs.open('compare.log','a',CSD)
        if GUI.JoinStart.GetValue():
            FoundedA.join(ReservedA)
        FoundedA.rem(VisitedA,BadA)
        ReservedA.rem(FoundedA,VisitedA,BadA)
        ImagesLinks.rem(SavedImages,BadImages)
        NewFoundedA,SetImages=set(),set()
        NumThreads=GUI.NumThreads.GetValue()
        counter=GUI.NumLinks.GetValue()/NumThreads
        for n in xrange(1,counter+1):
            if GUI.proceed and (FoundedA.size()>0 or ImagesLinks.size()>0):
                GUI.NumLeft.SetLabel('%i' % ((counter-n)*NumThreads))
                GUI.VisitedDisp.SetLabel('%i' % VisitedA.size())
                GUI.NumHandled.SetLabel('%i' % (n*NumThreads))
                HandleLinks(GUI,FoundedA.pop(NumThreads),MainURLs,NewFoundedA,SetImages,compare)
                GUI.FoundedDisp.SetLabel('%i' % FoundedA.size())
                GUI.Reserved.SetLabel('%i' % ReservedA.size())
                GUI.BadLinksDisp.SetLabel('%i' % BadA.size())
                GUI.BadImgsDisp.SetLabel('%i' % BadImages.size())
                app.Yield()
        if GUI.JoinEnd.GetValue():
            FoundedA.join(NewFoundedA,ReservedA)
        else:
            ReservedA.join(NewFoundedA)
        GUI.FoundedDisp.SetLabel('%i' % FoundedA.size())
        GUI.Reserved.SetLabel('%i' % ReservedA.size())
        app.Yield()
        map(lambda x:x.WriteLog(),LogList)
        compare.close()
    
    ##def RemoveWWW(URL):
    ##    PartsURL=urlparse(URL.strip())
    ##    return urlunparse((PartsURL[0],PartsURL[1].lstrip('Ww').lstrip('.'))+PartsURL[2:])
    
    CSD='utf_8'
    FoundedA,ReservedA,VisitedA,BadA,ImagesLinks,SavedImages,BadImages=LogList=tuple(map(
        lambda x:LogFile(x),('founded','reserved','visited','badfounded','images','saved','badimages')))
    LinkQueue=Queue()
    EA=wx.EXPAND|wx.ALL
    CA=wx.ALIGN_CENTER|wx.ALL
    CVA=wx.ALIGN_CENTER_VERTICAL|wx.ALL
    VER=wx.VERTICAL
    HOR=wx.HORIZONTAL
    EXP=wx.EXPAND
    AC=wx.ALIGN_CENTER
    AR=wx.ALIGN_RIGHT
    
    class control:
        HandleUnicode=lambda self,ErrorMess,URL:self.HandleCommon(
            ErrorMess,URL,self.GUI.DecodeSuppress.GetValue())
    
        def __init__(self,GUI):
            self.GUI=GUI
    
        def HandleCommon(self,ErrorMess,URL,suppress=False):
            print '  Error: %s, URL: %s' % (ErrorMess,URL)
            if not (suppress or self.GUI.SuppressAll.GetValue()):
                self.GUI.message(ErrorMess)
    
    class MainWin(wx.Frame):
        def __init__(self):
            wx.Frame.__init__(self,None,-1,'Image Downloader 0.0.2')
            self.SetSizer(wx.BoxSizer(VER))
            panel=wx.Panel(self,-1)
            self.GetSizer().Add(panel,1,EXP)
            panel.SetSizer(wx.BoxSizer(VER))
            box=panel.GetSizer()
            box.Add(wx.StaticText(panel,-1,'Enter the URL below and select the directory '
                                  'to download in the next field'),0,CA,5)
            grid=wx.FlexGridSizer(2,2,10,10)
            grid.AddGrowableCol(1)
            About=wx.Button(panel,wx.ID_ABOUT)
            About.Bind(wx.EVT_BUTTON,self.AboutDisplay)
            self.TextURL=wx.TextCtrl(panel,-1,'http://xxx.xxxxxxxx.xx/, http://yyy.yyyyyy.yy/')
            SetDir=wx.Button(panel,wx.ID_OPEN)
            SetDir.Bind(wx.EVT_BUTTON,self.OnClickSetDir)
            self.TextDir=wx.TextCtrl(panel,-1,getcwd())
            grid.AddMany(((About,0,EXP),(self.TextURL,0,EXP),(SetDir,0,EXP),(self.TextDir,0,EXP)))
            box.Add(grid,0,EA,5)
            grid=wx.FlexGridSizer(2,3,10,10)
            AddGrowableCols(grid,(0,1,2))
            self.JoinStart=wx.CheckBox(panel,-1,'Join founded links on start')
            self.JoinOnImage=wx.CheckBox(panel,-1,'Join founded links if image found')
            self.JoinProcess=wx.CheckBox(panel,-1,'Join founded links in process')
            self.JoinEnd=wx.CheckBox(panel,-1,'Join founded links at end')
            self.CBL=(self.JoinStart,self.JoinOnImage,self.JoinProcess,self.JoinEnd)
            map(lambda x:x.SetValue(True),self.CBL)
            self.DecodeSuppress=wx.CheckBox(panel,-1,'Suppress decode error messages')
            self.SuppressAll=wx.CheckBox(panel,-1,'Suppress all messages')
            grid.AddMany(self.CBL+(self.DecodeSuppress,self.SuppressAll))
            box.Add(grid,0,EA,5)
            grid=wx.GridSizer(2,3,10,10)
            self.TextRE=wx.TextCtrl(panel,-1,'[\'|\"]([^\'\"]*?(?:%s))\s*[\'|\"]')
            self.TextExt=wx.TextCtrl(panel,-1,'jpg, jpeg, gif, png, tif, tiff')
            self.TextExept=wx.TextCtrl(panel,-1,'mp3, mpeg, mp4, flv, avi, swf')
            grid.AddMany(((wx.StaticText(panel,-1,'Regular expression'),0,AC),
                          (wx.StaticText(panel,-1,'Collect files with extensions'),0,AC),
                          (wx.StaticText(panel,-1,'Do not pass to extensions'),0,AC),
                          (self.TextRE,0,EXP),(self.TextExt,0,EXP),(self.TextExept,0,EXP)))
            box.Add(grid,0,EA,5)
            box.Add(wx.StaticText(panel,-1,'User agent identification string of browser'),0,CA,5)
            self.TextUA=wx.TextCtrl(panel,-1,'Mozilla/5.0 (X11; Linux x86_64; '
                                    'rv:6.0.1) Gecko/20100101 Firefox/6.0.1')
            box.Add(self.TextUA,0,EA,5)
            grid=wx.FlexGridSizer(2,6,10,10)
            AddGrowableCols(grid,(1,3,5))
            (self.NumLeft,self.NumHandled,self.Reserved,self.FoundedDisp,self.VisitedDisp,
             self.BadLinksDisp,self.ImagesDisp,self.SavedDisp,self.BadImgsDisp)=self.STL=map(
                 lambda x:wx.StaticText(panel,-1,'waiting',style=AR),xrange(9))
            grid.AddMany((wx.StaticText(panel,-1,'Left counter:'),(self.NumLeft,0,EXP),
                          wx.StaticText(panel,-1,'Founded or left links:'),(self.FoundedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Founded or left images:'),(self.ImagesDisp,0,EXP),
                          wx.StaticText(panel,-1,'Handled count:'),(self.NumHandled,0,EXP),
                          wx.StaticText(panel,-1,'Visited links:'),(self.VisitedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Saved images:'),(self.SavedDisp,0,EXP),
                          wx.StaticText(panel,-1,'Reserved links:'),(self.Reserved,0,EXP),
                          wx.StaticText(panel,-1,'Bad links:'),(self.BadLinksDisp,0,EXP),
                          wx.StaticText(panel,-1,'Bad images links:'),(self.BadImgsDisp,0,EXP)))
            box.Add(grid,0,EA,5)
            box.AddStretchSpacer()
            box=wx.BoxSizer(HOR)
            self.NumLinks=wx.SpinCtrl(panel,-1,'1000',min=1,max=1000000,initial=1000)
            self.NumThreads=wx.SpinCtrl(panel,-1,'1',min=1,max=100,initial=1)
            box.AddMany(((wx.StaticText(panel,-1,'Number of links:'),0,CVA,5),(self.NumLinks,0,CVA,5),
                         (wx.StaticText(panel,-1,'Number of threads:'),0,CVA,5),(self.NumThreads,0,CVA,5)))
            box.AddStretchSpacer()
            OK=wx.Button(panel,wx.ID_OK)
            OK.Bind(wx.EVT_BUTTON,self.OnClickOK)
            box.Add(OK,0,CVA,5)
            self.SEW=(OK,SetDir,self.TextURL,self.TextDir,self.TextRE,self.TextExt,
                      self.TextExept,self.TextUA,self.NumLinks,self.NumThreads)
            STOP=wx.Button(panel,wx.ID_STOP)
            STOP.Bind(wx.EVT_BUTTON,self.stop)
            box.Add(STOP,0,CVA,5)
            panel.GetSizer().Add(box,0,EXP)
            self.config=CfgFile(self)
            self.control=control(self)
            self.Fit()
            self.Show()
    
        def stop(self,event):
            self.proceed=False
    
        def message(self,mess,style=wx.OK):
            dialog=wx.MessageDialog(self,mess,'Information',style)
            choice=dialog.ShowModal()
            dialog.Destroy()
            return choice
    
        def AboutDisplay(self,event):
            info=wx.AboutDialogInfo()
            info.AddDeveloper('Patrashov Alexey Sergeevich, Moscow, RF')
            info.SetCopyright('(C) All right reserved.')
            info.SetDescription('The utility for download of all images from the site. '
                                'Intuitive interface and the ability to modification '
                                'program by self. Program has many log files and options.')
            info.SetLicense('GNU GPL license.')
            info.SetName('Image Downloader')
            info.SetVersion('0.0.1')
            info.SetWebSite(('mailto:alex-wolf-75@mail.ru','alex-wolf-75@mail.ru'))
            wx.AboutBox(info)
    
        def OnClickSetDir(self,event):
            dialog=wx.DirDialog(self)
            MakeIfNE(self.TextDir.GetValue())
            if dialog.ShowModal()==wx.ID_OK:
                path=dialog.GetPath()
                chdir(path)
                self.TextDir.ChangeValue(path)
            dialog.Destroy()
            self.config.ReadCfg()
    
        def OnClickOK(self,event):
            SwitchOnStart=lambda enable:map(lambda x:x.Enable(enable),self.CBL+self.SEW)
            StrToTuple=lambda text:tuple(map(lambda x:x.strip(),text.split(',')))
            self.ExtStr=self.TextRE.GetValue() % '|'.join(
                map(lambda x:'\.'+x,StrToTuple(self.TextExt.GetValue())))
            self.ExeptTuple=tuple(map(lambda x:'.'+x,StrToTuple(self.TextExept.GetValue())+\
                                      StrToTuple(self.TextExt.GetValue())))
            self.UserAgent=self.TextUA.GetValue()
            MakeIfNE(self.TextDir.GetValue())
            map(lambda x:x.ReadLog(),LogList)
            self.config.WriteCfg()
            SwitchOnStart(False)
    ##        MainURLs=set(map(RemoveWWW,self.TextURL.GetValue().split(',')))
            MainURLs=set(map(lambda x: x.strip(),self.TextURL.GetValue().split(',')))
            FoundedA.join(MainURLs)
            key=wx.ID_YES
            while key==wx.ID_YES:
                self.proceed=True
                map(lambda x:x.SetLabel('waiting'),self.STL)
                handle(self,set(map(lambda x:urlparse(x)[1],MainURLs)))
                if not (((self.JoinStart.GetValue() or self.JoinOnImage.GetValue() \
                          or self.JoinProcess.GetValue() or self.JoinEnd.GetValue()) \
                         and ReservedA.size()>0 or FoundedA.size()>0) \
                        and self.proceed and self.SuppressAll.GetValue()):
                    key=self.message('Download completed. Continue?',wx.YES_NO|wx.YES_DEFAULT)
            SwitchOnStart(True)
    
    class App(wx.App):
        def __init__(self):
            wx.App.__init__(self)
            frame=MainWin()
    
    app=App()
    app.MainLoop()
    
Sign In or Register to comment.