Third solution, may be the best in term of speed/memory
% replacement of [n,i] = histc(x,edges,dim);
[~,~,i] = histcounts(x, [edges,Inf]);
s = uint32(size(x));
nbins = uint32(length(edges));
p1 = uint32(prod(s(1:dim-1)));
p2 = uint32(prod(s(dim+1:end)));
if p1 == 1 % happens for dim==1
k = reshape(uint32(i),[s(dim),p2]);
l = nbins*(0:p2-1);
ilin = l + k;
else
j = reshape(1:p1,[p1,1,1]);
k = reshape(uint32(i-1)*p1,[p1,s(dim),p2]);
l = reshape((p1*nbins)*(0:p2-1),[1,1,p2]);
ilin = (j + l) + k;
end
ilin = ilin(:);
s(dim) = nbins;
n = accumarray(ilin(i~=0),1,[prod(s),1]);
n = reshape(n,s);
EDIT: slight improve code by (1) avoid reduce calculation for dim=1, (2) indexing cast to UINT32