function FlickrDownload(Cat_Name,Out_Dir,Number_Images, display) % Little matlab script to get images off the Flickr image search web-pages % % Usage example: FlickrDownload('cars','/tmp/cars',200) % % This will search for 100 cars and put them into /tmp/cars % Requires the LYNX browser to be installed % % Issues: Can't adjust timeout on downloading an image, so it gets stuck % on an-inpossible-to-reach URL, it waits for a couple of minutes before % giving up. Irritating. Also Flickr only seems to provide 13 pages of % 24 images, so were looking at 300 images per dataset. % % Version 2.0 improvements: % (a) Output directory is now created if it wasn't present (before it % would just give 0 images per page in error) % (b) It now checks to see if you have write permission to the output % directory - before you'd get the 0 images per page problem. % % R.Fergus 30/9/03 if (Number_Images>=10000) error('Too many images - max is 9999'); end if(nargin<4) display = 0; end Per_Page=24; %%% Create the output directory it isn't present cmd=['!mkdir -p ' Out_Dir]; evalc(cmd); %%% Check we can write to it (save and read back in a file) try a=rand(2,2); save([Out_Dir,'/test.mat'],'a'); load([Out_Dir,'/test.mat']); delete([Out_Dir,'/test.mat']); catch error('Output directory could not be created'); end d=dir([Out_Dir,'/image*.jpg']); if isempty(d) offset=0; else offset=length(d); fprintf('%d images already in output directory\n',offset); end count=1; page_count=1; skip=0; tmpFname=[Out_Dir,'/tmp']; warning off fprintf('Searching Flickr for %s images....\n',Cat_Name); %% Flickr doesn't like spaces so replace space with - a=findstr(Cat_Name,' '); for c=1:length(a) Cat_Name(a(c))='-'; end while ((count<=Number_Images) & (page_count<14)) fprintf('Looking at page: %d\t',page_count); cmd=['"http://www.flickr.com/photos/tags/patterns/clusters/',Cat_Name,'/page',num2str(page_count),'"']; t=[]; while (length(t)<500) t=evalc(['!lynx -source ',cmd]); pause(1); end st=findstr(t,'http://static.flickr.com'); ed=findstr(t,'_t.jpg'); link=cell(Per_Page,1); ext=cell(Per_Page,1); for b=1:min([length(st),length(ed),Per_Page]) link{b}=t(st(b+1):ed(b)-1); end count_old=count; for c=1:length(link) if ~isempty(link{c}) imgFname=sprintf('%s/image_%05d.jpg', Out_Dir, count+offset); cmd=['!lynx -connect_timeout=3 -source "',link{c},'.jpg" > ',imgFname]; evalc(cmd); pause(3); count = count + 1; if(display) figure(display) im = imread(imgFname); imshow(im) title(['image: ', num2str(count-1), ' ', num2str(size(im, 2)), 'x', num2str(size(im, 1))]) pause(0) end end end fprintf('%d good images found\n',count-count_old); page_count = page_count + 1; end fprintf('Total images saved: %d\t Total downloaded: %d\t Total unusable: %d\n',count-1,(page_count-1)*Per_Page,skip);