%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                                               %%
%% This is the mc_template.tex file for the mc document class.   %%
%% It is used to prepare s manuscript for Mathematical 			 %%
%% Communications journal.                                       %%
%%                                                               %%
%% The mc.cls class works only with a pdflatex engine.           %%
%% The file newmc.cls should be placed where LaTeX 			     %%
%% can find it, e.g. in the current working directory.		     %%
%%                                                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\documentclass{mc}

%%===============================================================%%
%% Please add here your own packages, macros and enviroments.    %%
%% It is not necessary to include ams* and graphicx packages     %%
%% since they are automatically included by the mc class.        %%
%% Avoid defining your own environments and use the already      %%
%% defined ones (e.g.~theorem, lemma etc.)                       %%
%%===============================================================%%

%\usepackage{enumerate}  % uncomment to use this package
%\newcommand{\E}{\mathbb{E}} % example of a macro

%%===============================================================%%


%%===============================================================%%
%% Journal info will be edited by the typesetter 				 %%
%% DO NOT CHANGE THIS PART						                 %%
%%===============================================================%%
\setcounter{page}{1}
\renewcommand\thisnumber{1}
\renewcommand\thisyear {2026}
\renewcommand\thismonth{xxx}
\renewcommand\thisvolume{31}
\renewcommand\datereceived{March 25, 2025}
\renewcommand\dateaccepted{September 1, 2025}
\renewcommand\doinum{10.1000/100}
%%===============================================================%%%%%%% author macros %%%%%%%%%
% place your own macros HERE
\usepackage{mathrsfs} % the command \mathscr from the package mathrsfs only support capital letters...
\usepackage{enumerate}% http://ctan.org/pkg/enumerate
%\usepackage{enumitem} % to have 1.1  1.2  ....
\usepackage{dsfont} % \mathds from dsfont package 
%\usepackage{refcheck}
%Datum zaprimanja rada 25.3.2025.

% Datum prihvaćanja rada 1.9.2025.

\begin{document}
%\begin{linenumbers}
%%===============================================================%%
%% TITLE                                                         %%
%% Please add the title with \title[Short title]{Title}          %%
%% Short title is a running head apearing in the header.         %%
%%===============================================================%%
\title[Notes about statistical estimation of Shannon entropy]	% at most 50 characters including spaces
		{Notes about statistical estimation of Shannon entropy} 	% at most 150 characters including spaces ()
%%===============================================================%%


%%===============================================================%%
%% AUTHOR(S)                                                     %%
%%                                                               %%
%% Add author's details in the following format. For each author %%
%% provide the affiliation, address and Orcid identifier.		 %%
%% Mark the corresponding author with \comma\corrauth.			 %%
%%===============================================================%%
\author[Y.~Sun] % put here short author for header
	{Yu Sun{\affil{1}\comma\affil2}\orcidnumber{0000-0003-3127-8358}
      % if Second and Third authors share the same affiliation
	}		 

\address{\affilnum1 Systemic Risk Centre, London School of Economics and Political Science, London WC2A\,2AE, UK\\
		 \affilnum2 Department of Economics and Management, University of Trento, 38\,122 Trento, Italy
		} 

\emailsingle{%
	\email{y.u.sun@outlook.com}
		}

%% For single author use the following format. 				     %%
%\author[F.~Author]%
%	{First Author\orcidnumber{0000-0000-0000-0000}
%	}		 
%
%\address{Affiliation 1} 
%
%\emailsingle{%
%	\email{fauthor@mathos.hr}
%		}
%%===============================================================%%


%%===============================================================%%
%% ABSTRACT                                                      %%
%%===============================================================%%
\begin{abstract}
We improve the method of Bulinski and Dimitrov (2019) to prove the  $ L^{2} $-consistency of the  Kozachenko-Leonenko estimator for  (differential) Shannon entropy.
\end{abstract}
%%===============================================================%%


%%===============================================================%%
%% KEYWORDS                                                      %%
%%===============================================================%%
\keywords{Shannon differential entropy; nearest neighbor graphs; unbiasedness; consistency; Kozachenko-Leonenko estimate; $L^{2} $-consistency}
%%===============================================================%%


%%===============================================================%%
%% AMS subject classification                                    %%
%%===============================================================%%
\ams{60F25, 62G05, 62G20, 62H12}
%%===============================================================%%


%%===============================================================%%
\maketitle
%%===============================================================%%





%%===============================================================%%
%% MAIN BODY                                                     %%
%%===============================================================%%

\section{Introduction}
Shannon entropy is a fundamental concept in information theory that has numerous applications in fields like statistics, probability, and combinatorics. %The (differential) entropy of a random vector X with density function f is defined as:
Let $X\in\mathbb{R}^{d}$ be a random vector defined on a probability space $(\Omega , \mathscr{F}, P) $. Suppose that the joint distribution of $ X $ has a density $ f(x) $ with respect to the Lebesgue measure $ dx $, with the support $\mathcal{S}=\mathcal{S}(f)=\{x\in\mathbb{R}^{d}: f(x)>0\} $. 
Consider $ h(X)=-\log f(X) $,
%\begin{eqnarray} \label{log variable}
%h(X)&=&-\log f(X)\,,
%\end{eqnarray}
which can be thought of as the (random) information content of $ X $ (or as log-likelihood). The average value of information content of $ X $ is known as Shannon (or Boltzmann-Gibbs) entropy,
\begin{eqnarray}\label{shannon entropy}
H(X):=\mathsf{E}\big[h(X)\big]=-\int_{\mathcal{S}}{f(x)\log f(x)}dx \,.
%&=& \mathrm{E}(I^{S}(X)) \nonumber
\end{eqnarray}
In \cite{Kozachenko87}, Kozachenko and Leonenko construct a popular nonparametric (differential) entropy estimator, the so-called Kozachenko-Leonenko (K-L) estimator $ H_{N} $, based on the nearest neighbor (NN) graph. 
Let $ X_{1},\ldots ,X_{N} $ be i.i.d. random vectors having the same law as the random vector $ X\in\mathbb{R}^{d} $. %Assume that $ X $ has a density $ f $ with respect to the lebesgue measure $ dx $ in $ \mathbb{R}^{d} $ {\color{red} This has been already said at the beginning}. 
For each $ i=1,\ldots , N $ and $ N\geq 2 $, set $ \rho_{i}=\min\big\{\rho(X_{i},X_{j}), \,\forall i\in\{1\ldots N\}\,,\forall j\in\{1,\ldots,N\}\setminus\{i\} \big\} $, where $ \rho(x,y)=\parallel x-y\parallel $ denotes the \textit{Euclidean distance} between $ x,y\in\mathbb{R}^{d} $. In other words, $\rho_{i}$ is the distance from $ X_{i} $ to its nearest neighbor (NN) in the sample $ \{X_{1},\ldots,X_{N}\}\setminus\{X_{i}\} $, and $ \{\rho_{i}, i=1,\ldots , N\} $ defines the NN random graph.
Recall the NN in \cite{Kozachenko87}, the estimator of $ H $ defined in equation (\ref{shannon entropy})  is provided by the formula 
\begin{align*}
&&\hspace{.4cm} H_{N}=&\dfrac{1}{N}\sum\limits_{i=1}^{N}\zeta_{i}(N),% \label{HN zeta}\nonumber 
\\
&&\hspace{-1cm}\text{with}\,\,\zeta_{i}(N)=&\log \big[{\rho_{i}}^{d}V_{d}\,e^{\gamma}(N-1)\big]\,,\,\, i=1,\ldots, N,% \label{zeta_i N}\nonumber 
%,\,\text{and the constant}\,\,\widetilde{\gamma}=e^{\gamma}\approx 1.781 \nonumber \\
%&&\hspace{-1cm}\text{with \textit{Euler constant}}\,\gamma=-\int_{0}^{\infty} e^{-t}\log t\,d{t} \approx 0.5772 \label{Euler constant}
\end{align*}
where $ V_{d}:={\pi^{(d/2)}}/{\Gamma\big(\frac{d}{2}+1\big)}$ and $ \gamma=-\int_{0}^{\infty} e^{-t}\log t\,d{t} \approx 0.5772 $ are the volume of a unit ball in $ \mathbb{R}^{d} $ and the  \text{Euler-Mascheroni constant}, respectively.

\section{Main results}
In \cite{Bulinski19}, Bulinski and Dimitrov propose using an analogue of the Hardy-Littlewood maximal functions for an investigation of unbiasedness and consistency of $ H_{N} $. 
They provide   an interesting and detailed proof method; however, some errors and shortcomings emerged. In this paper, we correct the errors, refine the proof method, and propose a more rigorous and general proof framework. This framework can be applied not only to the entropy proof of the NN but also to the entropy proof of $ k $-th nearest neighbor ($ k $-NN) graphs, as well as to VarEntropy and high-order statistics of entropy estimators.
Before stating the main results, some more definitions are needed. Let $ B(x,r)=\{y\in\mathbb{R}^{d}: \rho(x,y)< r\} $ be a ball of a radius $ r>0 $ with a center $ x\in\mathbb{R}^{d} $. Clearly, $ |B(x,r)| = \mu\big(B(x,r)\big)=r^{d}\,V_{d} $. Let $ G(t) $ be a monotonically increasing function on $ [0,\infty) $:
\begin{eqnarray}\label{G(t)}\nonumber
G(t) = \begin{cases}
0, & 0 \leq t < 1, \\[2\jot]
t\log t, & t\geq 1\,.
\end{cases}
\end{eqnarray}
%Following Bulinski and Dimitrov (2019) \cite{Bulinski19}, we introduce
Recall the following functionals:
\begin{eqnarray}
&& I_{f}(x,r)=\dfrac{\int_{B(x,r)}f(y)dy}{r^{d}V_{d}},%\label{I_f(x,r)}\nonumber \\
%&&
\,\,\, M_{f}(x,R)=\sup\limits_{r\in(0,R]}I_{f}(x,r) \quad \text{and}
\quad m_{f}(x,R)=\inf\limits_{r\in(0,R]}I_{f}(x,r) \label{M_f e m_f}\nonumber.
\end{eqnarray}
It is known %(see Bulinski and Dimitrov 2019\cite{Bulinski19}) 
that the function $   I_{f}(x,r)$ is continuous in $ (x,r)\in \mathbb{R}^{d}\times (0,\infty) $, while for each $ R>0 $, the functions $ m_{f}(\cdot,R) $ and $ M_{f}(\cdot,R) $ are upper semicontinuous and lower semicontinuous, respectively.
Hence, these non-negative functions are Borel measurable. Clearly, for each $ x\in\mathbb{R}^{d} $,  $ m_{f}(\cdot,R) $ is nonincreasing and $ M_{f}(\cdot,R) $ is nondecreasing. Note in passing that substituting $ \sup_{r\in(0,R]} $ by $ \sup_{r\in(0,\infty)} $ in the definition of $M_{f}(x,R)  $ leads to the celebrated Hardy-Littlewood maximal function $ M_{f}(x) $ that is widely used in harmonic analysis. The main results in \cite{Bulinski19} are
\begin{assumption}\label{A1}
	For a continuous density $ f $ in $ \mathbb{R}^{d} $, given positive $ \varepsilon_{0}$,
	$ \varepsilon_{1}$, $ \varepsilon_{2}$, $R_{1}$, $R_{2}$, $ \alpha=1,2 $, it holds that 
	%we define the following functions with values in $ [0,\infty) $. % in $ [0,\infty] $. %where $ \varepsilon_{i}>0, R_{j}>0 $, with $ i=0,1,2 $ and $ j=1,2 $. {\color{red} $ \alpha = 1,2,3,4 $}
	\begin{align*}
	&&\hspace{-.6cm} K_{f}(\varepsilon_{0},\alpha):=&\int_{\mathbb{R}^{d}}\bigg(\int_{\mathbb{R}^{d}\setminus\{x\}}G(\mid\log^{\alpha}\rho(x,y)  \mid)f(y)dy\bigg)^{1+\varepsilon_{0}}f(x)dx <\infty\,,%\label{condition K}\nonumber
	\\
	%&&\hspace{-.6cm}\text{where}\,\, {\color{black} \alpha = 1,2,3,4 }  \nonumber\\
	&&\hspace{-.6cm} Q_{f}(\varepsilon_{1},R_{1}):=&\int_{\mathbb{R}^{d}}M^{\varepsilon_{1}}_{f}(x,R_{1})f(x)dx <\infty,%  \label{condition Q}\nonumber
	\\
	&&\hspace{-.6cm} T_{f}(\varepsilon_{2},R_{2}):=&\int_{\mathbb{R}^{d}}m^{-\varepsilon_{2}}_{f}(x,R_{2})f(x)dx <\infty. %\,.\label{condition T}\nonumber
	\end{align*}
\end{assumption}
%The main results of Bulinski and Dimitrov (2019) \cite{Bulinski19} are
%%\begin{tcolorbox}
\begin{theorem}\label{result I}
Under Assumption \ref{A1} and $ \alpha=1 $, the estimator $ H_{N}  $ is asymptotically unbiased, i.e.
\begin{eqnarray}
\mathsf{E}(H_{N})\rightarrow H\,,\qquad N\rightarrow\infty\,. \nonumber%\label{Varentropy exp eq1} 
\end{eqnarray}
\end{theorem}
%%\end{tcolorbox}
%%\begin{tcolorbox}
\begin{theorem}\label{result II}
Under Assumption \ref{A1} and $ \alpha=2 $, the estimator $ H_{N}  $ is $ L^{2}-$consistent, i.e.
\begin{eqnarray} %\label{Varentropy var eq1}
\mathsf{E}\big(H_{N}-H\big)^{2} \rightarrow \,0 \,,\qquad N\rightarrow\infty .\nonumber
%\mathsf{Var}(V_{N}^{2})\,\xrightarrow[\text{}]{N\rightarrow \infty}\,0 \,,\quad x\in A\,.
\end{eqnarray}
\end{theorem}
%The proofs of \textit{Theorem} \ref{result I} and \textit{Theorem} \ref{result II} are provided in Bulinski and Dimitrov (2019)\cite{Bulinski19}. 
The proofs of these two theorems, while drawing inspiration from the classical approach in \cite{Kozachenko87}, innovatively incorporate the Hardy-Littlewood maximal function, widely used in harmonic
analysis. However, the proof of Theorem 2 lacks rigor and contains errors. We have made the following corrections and refinements to address these issues.\\
%\section{Proof of \textit{Theorem} \ref{result I}}
%The proof of \textit{Theorem} \ref{result I} is provided in \textit{Section 3 Proof of Theorem 2.3} between page 2 and page 31 of Bulinski and Dimitrov (2019)\cite{Bulinski19}.
%Note from   equation between Eq.(3.27) and (3.28) on page 27 that
\\
The proof of \textit{Theorem} \ref{result II} (i.e. the $ L^{2} $-consistency of $ H_{N} $) is provided in \textit{Section 4 Proof of Theorem 2.8} in \cite{Bulinski19}. 
Note that the $ L^{2} $-consistence of $ H_{N} $ is guaranteed by $ \mathsf{Var}(H_{N})\xrightarrow[\text{}]{}\,  0$, as $ N\rightarrow \infty$, and 
\begin{eqnarray}\label{Var H_N}
\mathsf{Var}\big(H_{N}\big)%&=&\mathsf{Var}\bigg[ \dfrac{1}{N}\sum\limits_{i=1}^{N}\tilde{\zeta}_{i}^{2}(N)\bigg] \nonumber\\
=\dfrac{1}{N}\mathsf{Var}\big[\zeta_{1}(N)\big] +\dfrac{2}{N^{2}} \sum\limits_{1\leq i < j\leq N}\mathsf{Cov}\big[\zeta_{i}(N)\,,\,\zeta_{j}(N)\big]\,. \nonumber
\end{eqnarray}
The proof is composed of two steps. 
  Step 1 and Step 2 are established to prove the asymptotic zero of variance and covariance terms separately, i.e. $\dfrac{1}{N}\mathsf{Var}\big[\zeta_{1}(N)\big]\xrightarrow[\text{}]{}\,  0  $ and $ \dfrac{1}{N^{2}} \sum\limits_{1\leq i < j\leq N}\mathsf{Cov}\big[\zeta_{i}(N)\,,\,\zeta_{j}(N)\big]\xrightarrow[\text{}]{}\,  0 $ , as $ N\rightarrow \infty$.



In Step 1, Bulinski and Dimitrov adopt the setting of $ N=2 $ and replace the cumulative distribution functions $ p_{N,x}(u)$ and $F_{N,x}(u) $ with $p_{2,x}(w)$ and $F_{2,x}(w) $ to facilitate the proof. 
In particular, they change the variable from $ u $ to $ w={u}/{(N-1)}$ and thus, the radius $r_{N}(u) $ defined in \cite[Eq.~(3.7)]{Bulinski19}, i.e. $ r_{N}(u):=\bigg(\dfrac{u}{V_{d}\tilde{\gamma}(N-1)}\bigg)^{1/d} $, becomes $ r_{N}(u)=r_{N}(w(N-1))=r_{N}\Big(\dfrac{w}{V_{d}\tilde{\gamma}}\Big)^{1/d}=r_{2}(w) $.
%\begin{align}
%r_{N}(u)=r_{N}(w(N-1))=r_{N}\Big(\dfrac{w}{V_{d}\tilde{\gamma}}\Big)^{1/d}=r_{2}(w)\,,
%\end{align}
Hence, the accumulative distribution functions are changed accordingly (see, for instance, \cite[Eq.~(4.4)]{Bulinski19}).

In Step 2, Bulinski and Dimitrov specify that $ N\geq 3 $ is required (see, for instance, the settings of \cite[Eq.~(4.6)]{Bulinski19}). However, they apply the results of the   Step 1 in \cite[pp.~31-34]{Bulinski19}, which was based on the settings of $ N=2 $, e.g. $r_{N}(u)=r_{2}(w) $. In particular, in \cite[p.~37]{Bulinski19}, it is written: \textit{``The same reasoning as was used at Step 1 of the proof of Theorem 2.8 leads......''}. 

Note that we require $ N\geq 2 $, as we use the Euclidean distance of two different points $ \rho(x,y) $ to define the nearest neighbor. In other words, we need at least two points to define a ball $ B(x,r) :=\{y\in\mathbb{R}^{d}: \rho(x,y)<r\}$. 
Consider a ball centered at $ x $ with radius $ r $. We can adopt the setting of $ N=2 $ for   Step 1, where we only consider the variance of the single random variable $ y $ or $\zeta_{1}(N) $. But we need at least two points $ y $ and $ z $ to calculate the covariance in Step 2, i.e. we need $ N\geq 3 $.

We improved the proof of Step 2 by amending the settings of $ N=2 $ to adopt the requirement of $ N\geq 3 $.
% set $ N=2 $ and replace $ p_{N,x}(w)$ and $F_{N,x}(w) $ with $p_{2,x}(w)$ and $F_{2,x}(w) $ to facilitate the proof of Theorem 2.8. In specific, on page 37, it is written \textit{``The same reasoning as was used at Step 1 of the proof of Theorem 2.8 leads......''}. 
%Note that the investigation of $ J_{2}(N,x) $ in Step 1 is based on the case $ N= 2 $. 
To avoid confusion, we use $ J_{2}^{y}(N,x) $ to replace $ J_{2}(N,x) $ for the case when  $ N\geq 3 $. We define $ \tilde{u}={2u}/{(N-1)}$ and the change of variables leads to the following remarkable results:
\begin{enumerate}[(i)]
%\item  $  \tilde{u}=\dfrac{2u}{N-1}\in \Big[\frac{2}{\sqrt{N-1}},\infty\Big) \quad \Longleftrightarrow \quad u\in[\sqrt{N-1},\infty)\,,\quad N\geq 3$
\item %%$ \tilde{u}=\dfrac{2u}{N-1} $ and $ \tilde{u}\in \Big[\frac{2}{\sqrt{N-1}},\infty\Big) $, i.e.
%%\begin{eqnarray}\label{tilde u domain}
$
u\in[\sqrt{N-1},\infty)\quad \Longleftrightarrow \quad \tilde{u}=\dfrac{2u}{N-1}\in \Big[\frac{2}{\sqrt{N-1}},\infty\Big)\,,\quad N\geq 3\,,
$\\
%\end{eqnarray}
\item %%$  r_{N}(u)=r_{3}(\tilde{u})$, i.e.
%%\begin{eqnarray}\label{rN,u=rN,tilde u}
$
r_{N}(u)=\bigg[\dfrac{u}{(N-1)V_{d}\tilde{\gamma}}\bigg]^{1/d} = \bigg[\dfrac{\tilde{u}}{2V_{d}\tilde{\gamma}}\bigg]^{1/d}=r_{3}(\tilde{u})
$\,,
%%\end{eqnarray}
%$ r_{N}(u)=\bigg[\dfrac{u}{(N-1)V_{d}\tilde{\gamma}}\bigg]^{1/d} = \bigg[\dfrac{\tilde{u}}{2V_{d}\tilde{\gamma}}\bigg]^{1/d}=r_{3}(\tilde{u}) $
%\item $ r_{N}(u)=\bigg[\dfrac{u}{(N-1)V_{d}\tilde{\gamma}}\bigg]^{1/d} = \bigg[\dfrac{\tilde{u}}{V_{d}\tilde{\gamma}}\bigg]^{1/d}=r_{2}(\tilde{u})=  2^{1/d}\bigg[\dfrac{\tilde{u}}{2V_{d}\tilde{\gamma}}\bigg]^{1/d}=2^{1/d}r_{3}(\tilde{u}) $
%Therefore, we have $r_{3}(\tilde{u})=\dfrac{r_{2}(\tilde{u})}{2^{1/d}}\leq r_{2}(\tilde{u}) $, since $ 2^{1/d}\geq 1 $
\item %%$\mathsf{P}_{N,x}(u) =\mathsf{P}_{3,x}(\tilde{u}) $, i.e.
%%\begin{eqnarray}\label{PN,u=PN,tilde u}
$\mathsf{P}_{N,x}(u) =\int_{B(x, r_{N}(u))}f(\xi)d\xi=\int_{B(x, r_{3}(\tilde{u}))}f(\xi)d\xi =\mathsf{P}_{3,x}(\tilde{u}) 
%%\end{eqnarray}
$\,,
%\item $ \mathsf{P}_{N,x}(u)=\int_{B(x, r_{N}(u))}f(\xi)d\xi =\int_{B(x, r_{3}(\tilde{u}))}f(\xi)d\xi =\mathsf{P}_{3,x}(\tilde{u})$, with $  r_{3}(\tilde{u}) $ defined in (ii)
%\item $ \mathsf{P}_{N,x}(u)=\int_{B(x, r_{N}(u))}f(\xi)d\xi =\int_{B(x, r_{2}(\tilde{u}))}f(\xi)d\xi =\mathsf{P}_{2,x}(\tilde{u})\geq \mathsf{P}_{3,x}(\tilde{u})=\int_{B(x, r_{3}(\tilde{u}))}f(\xi)d\xi $, with $  r_{N}(u) $ defined in (ii)
%Therefore, $ 0\leq 1- \mathsf{P}_{2,x}(\tilde{u}) \leq 1- \mathsf{P}_{3,x}(\tilde{u}) \leq 1$
\item $ F^{y}_{N,x}(u)=F^{y}_{3,x}(\tilde{u}) $\,.%%, i.e.
%%\begin{eqnarray}\label{F y N=3,x tilde u}
%%F^{y}_{N,x}(u)&=&1- \mathds{1}\Big[\rho(x, y)>r_{N}(u)\Big] \Big[1-\mathsf{P}_{N,x}(u)\Big]^{N-2} \,,\qquad N\geq 3\nonumber\\ 
%%&=&1- \mathds{1}\Big[\rho(x, y)>r_{3}(\tilde{u})\Big] \Big[1-\mathsf{P}_{3,x}(\tilde{u})\Big] \nonumber\\ 
%%&=& F^{y}_{3,x}(\tilde{u})
%%\end{eqnarray}
\end{enumerate} 
Recall from the last equation in \cite[p.~32]{Bulinski19} that the integral $ \int_{[e,\infty]} $ is split into $ \int_{[e,\sqrt{N-1}]}+\int_{(\sqrt{N-1},\infty)} $: %as follows.
\begin{eqnarray}
I^{y}_{2}(N,x)%%&=&\Bigg(\int\limits_{[e,\sqrt{N-1}]}+\int\limits_{(\sqrt{N-1},\infty)}\Bigg)[1-F^{y}_{N,x}(u)] \dfrac{(\log u)^{3}}{u}\bigg[\log(\log u)+\frac{1}{4}\bigg]du \nonumber\\
&=& J^{y}_{1}(N,x)+J^{y}_{2}(N,x) ,\nonumber
\end{eqnarray}
with
\begin{eqnarray}
J^{y}_{1}(N,x)&=&\int_{[e,\sqrt{N-1}]}[1-F^{y}_{N,x}(u)] \dfrac{\log u}{u}\bigg[\log(\log u)+\frac{1}{2}\bigg]du\,, \nonumber \\
J^{y}_{2}(N,x)&=&\int_{(\sqrt{N-1},\infty)}[1-F^{y}_{N,x}(u)] \dfrac{\log u}{u}\bigg[\log(\log u)+\frac{1}{2}\bigg]du\,. \label{J_2 y}
\end{eqnarray}
We substitute  $ \tilde{u}=\dfrac{2u}{N-1} $ into \cite[Eq.~(4.4)]{Bulinski19} and split the integrals as follows:
%%the integral $ (\sqrt{N-1},\infty) $ becomes $(\frac{2}{\sqrt{N-1}},\infty)  $ , which can be split as follows
%%\begin{eqnarray}\label{intergal split J_2 2.2 y}
%%\bigg(\dfrac{2}{\sqrt{N-1}},\infty\bigg)= \bigg(\frac{2}{\sqrt{N-1}},e\bigg]\cup (e,\infty)  
%%\end{eqnarray}
%%The integral split requires $ \frac{2}{\sqrt{N-1}}\leq e $ or in other words $ N\geq 1+ \big(\frac{2}{e}\big)^{2} $. This requirement is clearly satisfied for any $ N\geq 2 $. Substituting Eq.(\ref{intergal split J_2 2.2 y}) the results in (i)-(iv) into Eq.(\ref{J_2 2.2 1 y}), we obtain
\begin{align}
J^{y}_{2}(N,x)\leq &  
\dfrac{(3\widetilde{\gamma})^{\varepsilon}}{m_{f}^{\varepsilon}(x,R_{2})(N-1)^{\varepsilon/2}}\Bigg(
\int\limits_{(\frac{2}{\sqrt{N-1}},e^{1+\Delta}]} +\int\limits_{(e^{1+\Delta},\infty)}\Bigg)\nonumber\\
 & \ \dfrac{\log^{} \dfrac{(N-1)\tilde{u}}{2}\bigg[\log\log\dfrac{(N-1)\tilde{u}}{2}+\dfrac{1}{2}\bigg]}{\tilde{u}}\big[1-\mathsf{P}_{3,x}(\tilde{u})\big]{d\tilde{u}}   \label{J_2 2.2 1 y eq2}
\end{align}
Recall from ($ \mathrm{iv} $) %in Eq.(\ref{F y N=3,x tilde u}), 
that we have %$1-F^{y}_{N,x}(u)=1-F^{y}_{3,x}(\tilde{u})  $ and
\begin{eqnarray*}%\label{1-F y N=3,x tilde u}
 1- F^{y}_{3,x}(\tilde{u})&=& \mathds{1}\Big[\rho(x, y)>r_{3}(\tilde{u})\Big] \Big[1-\mathsf{P}_{3,x}(\tilde{u})\Big] \nonumber\\ 
 &=& \begin{cases}
     1-\mathsf{P}_{3,x}(\tilde{u}), & \qquad \rho(x, y)>r_{3}(\tilde{u})\,, \\[2\jot]
    0, &\qquad  \rho(y,x)\leq r_{3}(\tilde{u})\,.
  \end{cases} 
\end{eqnarray*}
Therefore, when $ \rho(y,x)\leq r_{3}(\tilde{u}) $, we have $ F^{y}_{3,x}(\tilde{u})=F^{y}_{N,x}(u)=1 $. From equation (\ref{J_2 y}), we obtain that $J^{y}_{2}(N,x) $ is equal to zero.
Obviously, %%$J^{y}_{2}(N,x)<\infty  $ holds in this case, and 
the boundedness of $J^{y}_{2}(N,x)$ 
is proved in this case. Let us focus on the case $ \rho(x, y)>r_{3}(\tilde{u}) $, and then we have 
\begin{eqnarray}\label{F_3=P_3}
F^{y}_{3,x}(\tilde{u})=\mathsf{P}_{3,x}(\tilde{u})\,.
\end{eqnarray}
%%Note that the integral $ \int_{(\frac{2}{\sqrt{N-1}},e]} $ requires $ \frac{2}{\sqrt{N-1}}\leq e $, and it is satisfied for any $ N\geq 2 $.\\[.3cm]
From equations (\ref{J_2 2.2 1 y eq2}) to (\ref{F_3=P_3}), we obtain a refined $ J^{y}_{2}(N,x) $, which can replace $ J^{}_{2}(N,x) $ in \cite[Eq.~(4.4)]{Bulinski19}. Analogously, we can amend the related formulas in \text{Step 2} by replacing $ w $, $ P_{2,x}(w) $, $ F_{2,x}(w) $  with $ \tilde{u}/2 $, $ P_{3,x}(\tilde{u}) $, $ F_{3,x}(\tilde{u}) $.  After this amendment, %our new formulas will share similarly forms to the ones in \text{Step 1}, where $ N=2 $. 
the main results of Bulinski and Dimitrov remain unchanged. This work provides a rigorous correction to the proof of Theorem 2 in the original article. The refined methodology offers researchers a more precise framework for applying similar proof techniques to related problems. Notably, this improved approach has already demonstrated its utility through successful application and extension in the recent work in \cite{LST}.

 %Some extensions can also be found in Leonenko, Sun, and Taufer (2024).





%%The detailed investigation, analogously to the analysis in the \text{Section \ref{subparagraph J_2 2.2}}, which is based on $ N=2 $, can be obtained by defining $ \tilde{u}={2u}/{(N-1)}$ and analysing its associated change of variables as follows.



%%===============================================================%%
%% ACKNOWLEDGEMENTS                                              %%
%% Acknowledgements can be added here.                           %%
%%===============================================================%%
\section*{Acknowledgements}
%%===============================================================%%
 Yu Sun would like to thank for support of PRIN 2022 “Prediction and causal inference on the tail index for
policy decisions” - CUP E53D23006380006.



%%===============================================================%%
%% REFERENCES                                                    %%
%% References should be provided in bibtex file.                 %%
%% We suggest using MR Lookup for finding bibtex entries.        %%
%%===============================================================%%
\bibliography{references}
%\begin{thebibliography}{10}
%\bibitem{Bulinski19} Bulinski, A., and Dimitrov, D. (2019). Statistical Estimation of Shannon Entropy. Acta Mathematica Sinica, English Series, 35(1), 17-46.
%
%
%\bibitem{Kozachenko87} Kozachenko, L. F. and Leonenko, N. N. (1987). Sample estimate of the entropy of a random vector. Problems of Information Transmission, 23(1), p.95-101.
%
%\bibitem{LST} Leonenko, N., Sun, Y., and Taufer, E. (2024). Varentropy estimation via Nearest Neighbor Graphs. ArXiv,
%2402.09374, p.1-51.
%
%\end{thebibliography}


%%===============================================================%%
%% APPENDICES	                                                 %%
%% Appendices can be added here.                                 %%
%%===============================================================%%
%\normalsize
%\begin{appendices}
%\section{Some Appendix}
%Appendices should be placed at the end of the manuscript, after the references list. 
%\end{appendices}
%%===============================================================%%

%\end{linenumbers}
\end{document} 

