The zagoload allows to access local files and remote files – files which are accessible thought HTTP and FTP protocols – in uniform way.

Version Python github Build Status

Installing zagoload

The zagoload module can be installed by pip:

# pip install zagoload

You can also clone zagoload github repo.

Basics

To download URL, we will use zagoload.loads(url) which return new FileRequest object.

  • If the file was successfully downloaded, FileRequest.valid will be True.
  • The FileRequest.source will point to the source file
  • The FileRequest.target will point to the downloaded local file on disk. It will downloaded to cache under uniquely generated name
  • The FileRequest.data will contain the contents of the file

In the next example we will download http://www.google.com

import zagoload
def download(source):
  def info(ss):
    import sys ;sys.stdout.write(ss + u'\n')
  ff = zagoload.load(source)
  if ff.valid:
    # if valid, process ff.target - the file on disk
    info( 'Download {0} => {1}'.format(ff.source,ff.target) )
    # or process ff.text - the content of the file
    info( u'{0} characters : {1}'.format(len(ff.text),ff.text[:15].__repr__() ) )
  else:
    info('Failed to download {0}'.format(ff.source))
download(u'http://www.google.com' )
download(u'ftp://ftp.funet.fi/pub/standards/RFC/rfc959.txt')
# python sample_01.py
Download http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin
44994 characters : '<!doctype html>'
Download ftp://ftp.funet.fi/pub/standards/RFC/rfc959.txt => Cache\sH\file_sHDLNozuH0CkIMUA.bin
147316 characters : '\n              '

Download with params

We can provide GET parameters to the url using params parameter. In the next example we will this to query bing search engine for its three top results for “python” and “php”. We will rely on the fact that Bing takes a GET parameter named “q” for its search terms and that the results are the text enclosed in cite tag.

import re
import zagoload
def queryBing(query,count):
  def info(ss):
    import sys ;sys.stdout.write(ss + u'\n')
  def cleanTags(text):
    return re.sub(u'<(.*?)>','',text)
  # download url
  params = {}
  params['q'] = query
  ff = zagoload.load('http://www.bing.com/',params=params)
  if ff.valid:
    info(u'{0} => {1}'.format(ff.source,ff.target))
    info(u'Bing <{1}> - Top {0}'.format(count,query))
    reCite = '<cite>(?P<link>.*?)</cite>'
    zz = 0
    for ii in re.finditer( reCite , ff.text , re.DOTALL ):
      link = cleanTags(ii.group('link'))
      zz += 1
      info(u'  {0} => {1}'.format(zz,link))
      if zz == count:
        break
  else:
    info(u'Failed to download {0}'.format(ff.source))
queryBing( 'python', 3 )
queryBing( 'php'   , 3 )
# python sample_02.py
http://www.bing.com/?q=python => Cache\SS\file_SS9QoJfDt_OuZB6c.bin
Bing <python> - Top 3
  1 => https://www.python.org
  2 => https://docs.python.org
  3 => https://de.wikipedia.org/wiki/Python_(Programmiersprache)
http://www.bing.com/?q=php => Cache\09\file_09VHLZwzFwXecVvE.bin
Bing <php> - Top 3
  1 => ru.php.net
  2 => www.gentlesource.com
  3 => qa.php.net

Download to target

If target is emitted, the file will be downloaded to cache under uniquely generated name. If target parameter is provided :

  • If the file start with @, the file will download to following path after it
  • if the file is without extension will be to downloaded to cache under {target}_{id}.bin
  • if the file is with extension will be to downloaded to cache under {target}_{id}.{extension}
import zagoload
def download(source,target):
  def info(ss):
    import sys ;sys.stdout.write(ss + u'\n')
  ff = zagoload.load(source,target=target)
  if ff.valid:
    info(u'{0} => {1}'.format(ff.source,ff.target))
  else:
    info('Failed to download {0}'.format(ff.source))
download( 'http://www.google.com', '01.txt' )
download( 'http://www.google.com', '01' )
download( 'http://www.google.com', '@01.txt' )
# python sample_03.py
http://www.google.com => Cache\fW\01_fWebvNyPE1OGX2RS.txt
http://www.google.com => Cache\fW\01_fWebvNyPE1OGX2RS.bin
http://www.google.com => 01.txt

Caching

You proxy parameter define how to handle cached files

  • CacheMode.Enabled - check in cache for the source file. if it does not exist download it.
  • CacheMode.InCache - check in cache for the source file. if it does not exist report that failure.
  • CacheMode.Disabled - download the source file to cache even if it exist in the cache.
import zagoload
import os
import time
def download(source,cacheMode, cacheTime=0):
  def info(ss):
    import sys ;sys.stdout.write(ss + u'\n')
  ff = zagoload.load(source,cacheMode=cacheMode,cacheTime=cacheTime)
  if ff.valid:
    info( u'{0} => {1}. Time : {2:.2f}'.format(ff.source, ff.target, os.path.getmtime(ff.target) % 10000) )
  else:
    info( 'Failed to download {0}'.format(ff.source) )
# will fail since no file in cache
download( 'http://www.google.com', zagoload.CacheMode.InCache     )
# will download from source
download( 'http://www.google.com', zagoload.CacheMode.Enabled     )
# will use cached file
download( 'http://www.google.com', zagoload.CacheMode.Enabled     )
# will use cached file
download( 'http://www.google.com', zagoload.CacheMode.InCache     )
# will download the file from source, even though the file exist in cache
download( 'http://www.google.com', zagoload.CacheMode.Disabled    )
# will use cached file
download( 'http://www.google.com', zagoload.CacheMode.Enabled     )
# will download the file from source, since the cached file is older than 5 seconds
time.sleep(10)
download( 'http://www.google.com', zagoload.CacheMode.Enabled  , 5)
# python sample_04.py
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 8774.76
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 8774.76
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 8774.76
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 8774.76
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 9232.80
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 9232.80
http://www.google.com => Cache\fW\file_fWebvNyPE1OGX2RS.bin. Time : 9243.05

Consuming restful api

The json function allows to consume restful api. If the request was successful, the FileRequest.json will contians the parsed json of the response.

import zagoload
def find_post_title(id):
 ff = zagoload.json( 'https://jsonplaceholder.typicode.com/posts/{0}'.format(id) )
 return ff.json['title']
def find_users():
  ff = zagoload.json( 'https://jsonplaceholder.typicode.com/users' )
  return [user['username'] for user in ff.json]
def add_post(postdata = {}):
  ff = zagoload.json('http://jsonplaceholder.typicode.com/posts', action='POST', postdata = postdata , cacheMode=zagoload.CacheMode.Disabled )
  return ff.rStatus == 200
def run():
  print( find_post_title(1)  )
  print( find_users() )
  if add_post({ 'id': 1, 'title': 'foo', 'body': 'bar', 'userId': 1 }):
    print("Added")
run()
# python sample_06.py
sunt aut facere repellat provident occaecati excepturi optio reprehenderit
['Bret', 'Antonette', 'Samantha', 'Karianne', 'Kamren', 'Leopoldo_Corkery', 'Elwyn.Skiles', 'Maxime_Nienow', 'Delphine',
 'Moriah.Stanton']
Added

Other Parameters

  • If you do not use FileRequest.data, you can set the parameter loadData to False to save memory resources.
  • You can set retries parameter to number of tries that the class should do before retirement and reporting that the download has failed ( FileRequest.valid is False and FileRequest.failed is True ). The default value is 2.
  • You can set contentType to the content type of the expected file (if downloaded via HTTP protocol). If the content type the is different than expected, the download will fail.
  • You can set maxCache - The time in seconds while the file which exist in cache folder is valid.

You can give a callback function to display the download progress via onDownloading parameter which should have the following signature:

def onDownloading(fileSize,downSize, downSpeed):
  # the fileSize is size of the source file in bytes
  # the downSize is size of the target file which already downlaoded in bytes
  # the downsize is the speed as bytes per second
import zagoload
def download(source):
  def info(ss):
    import sys ;sys.stdout.write(ss + u'\n')
  def onDownload(fileSize,downSize, downSpeed):
    info( u'{0:3}% - {1:8}/{2}, {3:4.0f}kb/s'.format(int(100*downSize/fileSize), downSize, fileSize, downSpeed/1024))
  ff = zagoload.load(source,onDownload=onDownload)
  if ff.valid:
    info( u'{0} => {1}'.format(ff.source, ff.target) )
  else:
    info( 'Failed to download {0}'.format(ff.source) )
download('http://download.thinkbroadband.com/5MB.zip')
# python sample_05.py
  6% -   339968/5242880,  332kb/s
 22% -  1163264/5242880,  768kb/s
 51% -  2674688/5242880, 1431kb/s
 94% -  4935680/5242880, 2208kb/s
http://download.thinkbroadband.com/5MB.zip => Cache\CO\file_CO3FhQihPolP4gE0.bin