From b1a37f16f6ec0b352154f6a4798c51ccf4bac23b Mon Sep 17 00:00:00 2001 From: liran319 Date: Thu, 10 Apr 2014 23:16:40 +0800 Subject: [PATCH] Design a downloading tool --- .gitattributes | 22 +++++ .gitignore | 215 +++++++++++++++++++++++++++++++++++++++++++++++ ImageDownload.py | 52 ++++++++++++ README.md | 11 +++ 4 files changed, 300 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 ImageDownload.py create mode 100644 README.md diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..412eeda --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Custom for Visual Studio +*.cs diff=csharp +*.sln merge=union +*.csproj merge=union +*.vbproj merge=union +*.fsproj merge=union +*.dbproj merge=union + +# Standard to msysgit +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b9d6bd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,215 @@ +################# +## Eclipse +################# + +*.pydevproject +.project +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.classpath +.settings/ +.loadpath + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific +.cproject + +# PDT-specific +.buildpath + + +################# +## Visual Studio +################# + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.sln.docstates + +# Build results + +[Dd]ebug/ +[Rr]elease/ +x64/ +build/ +[Bb]in/ +[Oo]bj/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +*_i.c +*_p.c +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.log +*.scc + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +*.ncrunch* +.*crunch*.local.xml + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.Publish.xml +*.pubxml + +# NuGet Packages Directory +## TODO: If you have NuGet Package Restore enabled, uncomment the next line +#packages/ + +# Windows Azure Build Output +csx +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +sql/ +*.Cache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.[Pp]ublish.xml +*.pfx +*.publishsettings + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file to a newer +# Visual Studio version. Backup files are not needed, because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +App_Data/*.mdf +App_Data/*.ldf + +############# +## Windows detritus +############# + +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Mac crap +.DS_Store + + +############# +## Python +############# + +*.py[co] + +# Packages +*.egg +*.egg-info +dist/ +build/ +eggs/ +parts/ +var/ +sdist/ +develop-eggs/ +.installed.cfg + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox + +#Translations +*.mo + +#Mr Developer +.mr.developer.cfg diff --git a/ImageDownload.py b/ImageDownload.py new file mode 100644 index 0000000..6af62b6 --- /dev/null +++ b/ImageDownload.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +import os +import sys +import re +import urllib + +url_addr = sys.argv[1] +save_path = sys.argv[2] +# url_addr = r"http://www.douban.com/" +# save_path = r"C:\Users\naril\Desktop\new" +html = urllib.urlopen(url_addr).read() + + +class Get_Picture(object): + def __init__(self, html_content): + self.html_content = html_content + + def get_image_list(self): + """Get all the image url from the website url, + and return a list to save it.""" + image_list = [] + # print self.html_content + pattern = r'?' # match the image with re + match_result = re.findall(pattern, self.html_content) + # print match_result + for i in match_result: + # url = url_addr + i # ganerate the absolute address of picture + image_list.append(i) + # print image_list + self.image_list = image_list + return self.image_list + + def downloader(self): + """Download all the images into some dir""" + for each_url in self.image_list: + filename = each_url.split("/")[-1] + savedfile = os.path.join(save_path, filename) + try: + urllib.urlretrieve(each_url, savedfile) + except: + pass + print "Downloading completed!" + + +def run(): + a = Get_Picture(html) + a.get_image_list() + a.downloader() + +if __name__ == '__main__': + run() diff --git a/README.md b/README.md new file mode 100644 index 0000000..3216d84 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +Designed by Ran. + +Description: +Download all the pictures of some website. +1. And keep their names on the website. +2. Then print their size. +3. Download all the images and save them to one directory + +Usage: +Run this program in cmd or linux bash: + "python ImageDownloader.py " \ No newline at end of file