Merge pull request #722 from suminb/feature/naver-reader

bashtage · web-flow · commit ee4a0874bde2 · 2020-07-07T10:43:30.000+01:00
Daily historical data from Naver Finance
diff --git a/docs/source/readers/index.rst b/docs/source/readers/index.rst
@@ -14,6 +14,7 @@ Data Readers
    iex
    moex
    nasdaq-trader
+   naver
    oecd
    quandl
    stooq
diff --git a/docs/source/readers/naver.rst b/docs/source/readers/naver.rst
@@ -0,0 +1,8 @@
+Naver Finance
+-------------
+
+.. py:module:: pandas_datareader.naver
+
+.. autoclass:: NaverDailyReader
+   :members:
+   :inherited-members: read
diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst
@@ -41,6 +41,7 @@ Currently the following sources are supported:
     - :ref:`Nasdaq Trader symbol definitions<remote_data.nasdaq_symbols>`
     - :ref:`Stooq<remote_data.stooq>`
     - :ref:`MOEX<remote_data.moex>`
+    - :ref:`Naver Finance<remote_data.naver>`
 
 It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.
 
@@ -685,3 +686,19 @@ The Moscow Exchange (MOEX) provides historical data.
    import pandas_datareader.data as web
    f = web.DataReader('USD000UTSTOM', 'moex', start='2017-07-01', end='2017-07-31')
    f.head()
+
+.. _remote_data.naver:
+
+Naver Finance Data
+==================
+`Naver Finance <https://finance.naver.com>`_ provides Korean stock market
+(`KOSPI`_, `KOSDAQ`_) historical data.
+
+.. ipython:: python
+
+   import pandas_datareader.data as web
+   df = web.DataReader('005930', 'naver', start='2019-09-10', end='2019-10-09')
+   df.head()
+
+.. _KOSPI: https://en.wikipedia.org/wiki/KOSPI
+.. _KOSDAQ: https://en.wikipedia.org/wiki/KOSDAQ
diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py
@@ -30,6 +30,7 @@
 )
 from pandas_datareader.moex import MoexReader
 from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
+from pandas_datareader.naver import NaverDailyReader
 from pandas_datareader.oecd import OECDReader
 from pandas_datareader.quandl import QuandlReader
 from pandas_datareader.stooq import StooqDailyReader
@@ -44,6 +45,7 @@
 from pandas_datareader.yahoo.options import Options as YahooOptions
 from pandas_datareader.yahoo.quotes import YahooQuotesReader
 
+
 __all__ = [
     "get_components_yahoo",
     "get_data_enigma",
@@ -364,6 +366,7 @@ def DataReader(
         "av-monthly-adjusted",
         "av-intraday",
         "econdb",
+        "naver",
     ]
 
     if data_source not in expected_source:
@@ -662,6 +665,16 @@ def DataReader(
             session=session,
         ).read()
 
+    elif data_source == "naver":
+        return NaverDailyReader(
+            symbols=name,
+            start=start,
+            end=end,
+            retry_count=retry_count,
+            pause=pause,
+            session=session,
+        ).read()
+
     else:
         msg = "data_source=%r is not implemented" % data_source
         raise NotImplementedError(msg)
diff --git a/pandas_datareader/naver.py b/pandas_datareader/naver.py
@@ -0,0 +1,107 @@
+from datetime import datetime
+from xml.etree import ElementTree
+
+import numpy as np
+from pandas import DataFrame, to_datetime
+from pandas_datareader.base import _DailyBaseReader
+from six import string_types
+
+
+class NaverDailyReader(_DailyBaseReader):
+    """Fetches daily historical data from Naver Finance.
+
+    :param symbols: A single symbol; multiple symbols are not currently supported.
+    :param adjust_price: Not implemented
+    :param interval: Not implemented
+    :param adjust_dividends: Not implemented
+    """
+
+    def __init__(
+        self,
+        symbols=None,
+        start=None,
+        end=None,
+        retry_count=3,
+        pause=0.1,
+        session=None,
+        adjust_price=False,
+        ret_index=False,
+        chunksize=1,
+        interval="d",
+        get_actions=False,
+        adjust_dividends=True,
+    ):
+        if not isinstance(symbols, string_types):
+            raise NotImplementedError("Bulk-fetching is not implemented")
+
+        super(NaverDailyReader, self).__init__(
+            symbols=symbols,
+            start=start,
+            end=end,
+            retry_count=retry_count,
+            pause=pause,
+            session=session,
+            chunksize=chunksize,
+        )
+
+        self.headers = {
+            "Sec-Fetch-Mode": "no-cors",
+            "Referer": "https://finance.naver.com/item/fchart.nhn?code={}".format(
+                symbols
+            ),
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",  # noqa
+        }
+
+    @property
+    def get_actions(self):
+        return self._get_actions
+
+    @property
+    def url(self):
+        return "https://fchart.stock.naver.com/sise.nhn"
+
+    def _get_params(self, symbol):
+        # NOTE: The server does not take start, end dates as inputs; it only
+        # takes the number of trading days as an input. To circumvent this
+        # pitfall, we calculate the number of business days between self.start
+        # and the current date. And then we filter by self.end before returning
+        # the final result (in _read_one_data()).
+        days = np.busday_count(self.start.date(), datetime.now().date())
+        params = {"symbol": symbol, "timeframe": "day", "count": days, "requestType": 0}
+        return params
+
+    def _read_one_data(self, url, params):
+        """Read one data from specified symbol.
+
+        :rtype: DataFrame
+        """
+        resp = self._get_response(url, params=params)
+        parsed = self._parse_xml_response(resp.text)
+        prices = DataFrame(
+            parsed, columns=["Date", "Open", "High", "Low", "Close", "Volume"]
+        )
+        prices["Date"] = to_datetime(prices["Date"])
+        prices = prices.set_index("Date")
+
+        # NOTE: See _get_params() for explanations.
+        return prices[(prices.index >= self.start) & (prices.index <= self.end)]
+
+    def _parse_xml_response(self, xml_content):
+        """Parses XML response from the server.
+
+        An example of response:
+
+            <?xml version="1.0" encoding="EUC-KR" ?>
+            <protocol>
+                <chartdata symbol="005930" name="Samsung Elctronics" count="500"
+                        timeframe="day" precision="0" origintime="19900103">
+                    <item data="20170918|218500|222000|217000|220500|72124" />
+                    <item data="20170919|218000|221000|217500|219000|62753" />
+                    ...
+            </protocol>
+        """
+        root = ElementTree.fromstring(xml_content)
+        items = root.findall("chartdata/item")
+
+        for item in items:
+            yield item.attrib["data"].split("|")
diff --git a/pandas_datareader/tests/test_naver.py b/pandas_datareader/tests/test_naver.py
@@ -0,0 +1,27 @@
+from datetime import datetime
+
+from pandas_datareader import DataReader
+import pytest
+
+
+class TestNaver(object):
+    @pytest.mark.parametrize(
+        "symbol, start, end",
+        [
+            ("005930", (2019, 10, 1), (2019, 10, 7)),
+            ("000660", (2018, 1, 1), (2018, 12, 31)),
+            ("069500", (2017, 6, 3), (2018, 9, 9)),
+        ],
+    )
+    def test_naver_daily_reader(self, symbol, start, end):
+        start = datetime(*start)
+        end = datetime(*end)
+        reader = DataReader(symbol, "naver", start, end)
+
+        assert reader.shape[1] == 5
+        assert reader.index.min() >= start
+        assert reader.index.max() <= end
+
+    def test_bulk_fetch(self):
+        with pytest.raises(NotImplementedError):
+            DataReader(["005930", "000660"])

-Original file line number
+Diff line change
    iex
    moex
    nasdaq-trader
 +   naver
    oecd
    quandl
    stooq