Skip to content

Commit

Permalink
Design a downloading tool
Browse files Browse the repository at this point in the history
  • Loading branch information
ranlix committed Apr 10, 2014
0 parents commit b1a37f1
Show file tree
Hide file tree
Showing 4 changed files with 300 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Auto detect text files and perform LF normalization
* text=auto

# Custom for Visual Studio
*.cs diff=csharp
*.sln merge=union
*.csproj merge=union
*.vbproj merge=union
*.fsproj merge=union
*.dbproj merge=union

# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
215 changes: 215 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
#################
## Eclipse
#################

*.pydevproject
.project
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.classpath
.settings/
.loadpath

# External tool builders
.externalToolBuilders/

# Locally stored "Eclipse launch configurations"
*.launch

# CDT-specific
.cproject

# PDT-specific
.buildpath


#################
## Visual Studio
#################

## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.

# User-specific files
*.suo
*.user
*.sln.docstates

# Build results

[Dd]ebug/
[Rr]elease/
x64/
build/
[Bb]in/
[Oo]bj/

# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*

*_i.c
*_p.c
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.log
*.scc

# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
*.cachefile

# Visual Studio profiler
*.psess
*.vsp
*.vspx

# Guidance Automation Toolkit
*.gpState

# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper

# TeamCity is a build add-in
_TeamCity*

# DotCover is a Code Coverage Tool
*.dotCover

# NCrunch
*.ncrunch*
.*crunch*.local.xml

# Installshield output folder
[Ee]xpress/

# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html

# Click-Once directory
publish/

# Publish Web Output
*.Publish.xml
*.pubxml

# NuGet Packages Directory
## TODO: If you have NuGet Package Restore enabled, uncomment the next line
#packages/

# Windows Azure Build Output
csx
*.build.csdef

# Windows Store app package directory
AppPackages/

# Others
sql/
*.Cache
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.[Pp]ublish.xml
*.pfx
*.publishsettings

# RIA/Silverlight projects
Generated_Code/

# Backup & report files from converting an old project file to a newer
# Visual Studio version. Backup files are not needed, because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm

# SQL Server files
App_Data/*.mdf
App_Data/*.ldf

#############
## Windows detritus
#############

# Windows image file caches
Thumbs.db
ehthumbs.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Mac crap
.DS_Store


#############
## Python
#############

*.py[co]

# Packages
*.egg
*.egg-info
dist/
build/
eggs/
parts/
var/
sdist/
develop-eggs/
.installed.cfg

# Installer logs
pip-log.txt

# Unit test / coverage reports
.coverage
.tox

#Translations
*.mo

#Mr Developer
.mr.developer.cfg
52 changes: 52 additions & 0 deletions ImageDownload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-

import os
import sys
import re
import urllib

url_addr = sys.argv[1]
save_path = sys.argv[2]
# url_addr = r"http://www.douban.com/"
# save_path = r"C:\Users\naril\Desktop\new"
html = urllib.urlopen(url_addr).read()


class Get_Picture(object):
def __init__(self, html_content):
self.html_content = html_content

def get_image_list(self):
"""Get all the image url from the website url,
and return a list to save it."""
image_list = []
# print self.html_content
pattern = r'<img.*?src="(.*\..{3})"/?>?' # match the image with re
match_result = re.findall(pattern, self.html_content)
# print match_result
for i in match_result:
# url = url_addr + i # ganerate the absolute address of picture
image_list.append(i)
# print image_list
self.image_list = image_list
return self.image_list

def downloader(self):
"""Download all the images into some dir"""
for each_url in self.image_list:
filename = each_url.split("/")[-1]
savedfile = os.path.join(save_path, filename)
try:
urllib.urlretrieve(each_url, savedfile)
except:
pass
print "Downloading completed!"


def run():
a = Get_Picture(html)
a.get_image_list()
a.downloader()

if __name__ == '__main__':
run()
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Designed by Ran.

Description:
Download all the pictures of some website.
1. And keep their names on the website.
2. Then print their size.
3. Download all the images and save them to one directory

Usage:
Run this program in cmd or linux bash:
"python ImageDownloader.py <url_address> <Directory>"

0 comments on commit b1a37f1

Please sign in to comment.