Description
Dukascopy (also #153) provides ticks data
http://www.dukascopy.com/datafeed/EURUSD/2016/02/14/20h_ticks.bi5
is ticks for 2016-02-14 (datetime.datetime(2016, 2, 14)
) from 08PM to 09PM
This is LZMA compressed data.
https://github.com/thalesians/pythalesians provides some Python code to download and process data.
https://github.com/thalesians/pythalesians/blob/4974a26c58fde1b4a86e6b683494a7ccd8fb6e2e/pythalesians/market/loaders/lowlevel/brokers/loaderdukascopy.py
they are using Struct https://docs.python.org/2/library/struct.html
but Numpy fromfile http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.fromfile.html
or Construct http://construct.readthedocs.org/ might also be used
see also https://github.com/ninety47/dukascopy
http://eareview.net/tick-data/dukascopy-php-scripts
https://github.com/FX31337/FX-BT-Scripts
http://stackoverflow.com/questions/14035808/reading-data-from-dukascopy-tick-binary-file
http://stackoverflow.com/questions/30389417/not-sure-how-to-uncompress-read-result-from-binary-file
WIP:
import click
# import struct
import datetime
try:
import lzma
except ImportError:
# pip install backports.lzma
from backports import lzma
import numpy as np
import pandas as pd
from collections import OrderedDict
import requests
import requests_cache
import warnings
from pandas_datareader._utils import RemoteDataError, SymbolWarning
def _init_session(session):
if session is None:
session = requests.Session()
return session
# def chunks(lst, n):
# if n < 1:
# n = 1
# return [lst[i:i + n] for i in range(0, len(lst), n)]
def _sanitize_dates(start, end):
"""
Return (datetime_start, datetime_end) tuple
if start is None - default is 2016/02/04
if end is None - default is today
"""
start = pd.to_datetime(start)
end = pd.to_datetime(end)
lst_not_a_time = [None, pd.NaT]
if start in lst_not_a_time:
start = datetime.datetime(2016, 2, 4)
else:
start = datetime.datetime(start.year, start.month, start.day,
start.hour)
if end in lst_not_a_time:
end = start
else:
end = datetime.datetime(end.year, end.month, end.day, end.hour)
return start, end
def _sanitize_symbol(symb):
return symb.replace("/", "").upper()
def read(symbols, start, stop=None, session=None):
return _read_several_several_symbols(_read_one_symbol, symbols, start, stop, session)
def _read_several_several_symbols(fct_read, symbols, start, stop=None, session=None):
if len(symbols) == 1:
df = fct_read(symbols[0], start, stop, session)
return df
else:
d_df_symb = OrderedDict()
failed = []
for symb in symbols:
try:
d_df_symb[symb] = fct_read(symb, start, stop, session)
except RemoteDataError:
msg = 'Failed to read symbol: {0!r}'
warnings.warn(msg.format(symb), SymbolWarning)
failed.append(symb)
return pd.Panel(d_df_symb).swapaxes('items', 'minor')
def _read_one_symbol(symb, start, stop, session):
start, end = _sanitize_dates(start, stop)
dt_chunks = pd.date_range(start, stop, freq="1H")
lst = []
for dt_chunk in dt_chunks:
df_chunk = _read_chunk(symb, dt_chunk, session)
lst.append(df_chunk)
df_symb = pd.concat(lst, axis=0)
return df_symb
def _read_chunk(symb, dt_chunk, session=None):
session = _init_session(session)
symb = _sanitize_symbol(symb)
base_url = "http://www.dukascopy.com/datafeed"
endpoint = "/%s/%04d/%02d/%02d/%02dh_ticks.bi5" \
% (symb, dt_chunk.year, dt_chunk.month, dt_chunk.day, dt_chunk.hour)
url = base_url + endpoint
# Example:
# http://www.dukascopy.com/datafeed/EURUSD/2016/02/14/20h_ticks.bi5
response = session.get(url)
if response.status_code != 200:
raise RemoteDataError("Can't download %r using %r" % (symb, url))
compressed_data = response.content
columns = ["Date", "Ask", "Bid", "AskVolume", "BidVolume"]
if len(compressed_data) > 0:
raw_data = lzma.decompress(response.content)
# chks = chunks(raw_data, 20)
# data = [struct.unpack(">LLLff", chk) for chk in chks]
"""
">LLLff"
> big-endian
L unsigned long
L unsigned long
L unsigned long
f float
f float
"""
record_dtype = np.dtype([
('Date', '>u4'),
('Ask', '>u4'),
('Bid', '>u4'),
('AskVolume', '>f4'),
('BidVolume', '>f4'),
])
data = np.fromstring(raw_data, record_dtype)
df = pd.DataFrame(data, columns=columns)
if symb[3:] == "JPY":
p_digits = 3
else:
p_digits = 5
for p in ["Ask", "Bid"]:
df[p] = df[p] / 10**p_digits
df["Date"] = dt_chunk + pd.to_timedelta(df["Date"], unit="ms")
df = df.set_index("Date")
return df
else:
return pd.DataFrame(columns=columns).set_index("Date")
@click.command()
#@click.option('--symb', default="EURUSD,USDJPY", help='Symbol.')
@click.option('--symb', default="EURUSD", help='Symbol.')
@click.option('--start', default="2016-02-15 00:00:00", help='Start.')
@click.option('--stop', default="2016-02-15 23:00:00", help='Stop.')
def main(symb, start, stop):
pd.set_option("max_rows", 10)
symb = symb.split(",")
expire_after = datetime.timedelta(days=3)
session = requests_cache.CachedSession(cache_name='cache',
backend='sqlite',
expire_after=expire_after)
df = read(symb, start, stop, session=session)
print(df)
if __name__ == '__main__':
main()