@Article{cmc.2020.011816,
AUTHOR = {Ehab Mahmoud Mohamed, Sherief Hashima, Kohei Hatano, Hani Kasban, Mohamed Rihan},
TITLE = {Millimeter-Wave Concurrent Beamforming: A Multi-Player  Multi-Armed Bandit Approach},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {65},
YEAR = {2020},
NUMBER = {3},
PAGES = {1987--2007},
URL = {http://www.techscience.com/cmc/v65n3/40151},
ISSN = {1546-2226},
ABSTRACT = {The communication in the Millimeter-wave (mmWave) band, i.e., 30~300 
GHz, is characterized by short-range transmissions and the use of antenna beamforming 
(BF). Thus, multiple mmWave access points (APs) should be installed to fully cover a 
target environment with gigabits per second (Gbps) connectivity. However, inter-beam 
interference prevents maximizing the sum rates of the established concurrent links. In this 
paper, a reinforcement learning (RL) approach is proposed for enabling mmWave 
concurrent transmissions by finding out beam directions that maximize the long-term 
average sum rates of the concurrent links. Specifically, the problem is formulated as a 
multiplayer multiarmed bandit (MAB), where mmWave APs act as the players aiming to 
maximize their achievable rewards, i.e., data rates, and the arms to play are the available 
beam directions. In this setup, a selfish concurrent multiplayer MAB strategy is 
advocated. Four different MAB algorithms, namely, <i>ϵ</i>-greedy, upper confidence bound 
(UCB), Thompson sampling (TS), and exponential weight algorithm for exploration and 
exploitation (EXP3) are examined by employing them in each AP to selfishly enhance its 
beam selection based only on its previous observations. After a few rounds of interactions, 
mmWave APs learn how to select concurrent beams that enhance the overall system 
performance. The proposed MAB based mmWave concurrent BF shows comparable 
performance to the optimal solution.},
DOI = {10.32604/cmc.2020.011816}
}