我在将矩阵.row(i)
作为左值引用传递时遇到问题。我认为这应该可以用来解决Eigen::Ref<>
,但我遇到了困难。
这篇文章底部的大代码块目前可以正常工作。此代码的概要如下所示:
100% fbatch
47.1% void de_casteljau
44.8% Eigen::internal::dense_assignment_loop::run
4.9% libsystem_malloc.dylib`_free
1.1% libsystem_malloc.dylib`_nanov2_free
< 1% DYLD-STUB$$free
< 1% Eigen::DenseStorage::resize
我相信这Eigen::internal::dense_assignment_loop::run
是由代码块最后一个例程中的 for 循环中的临时和复制引起的。(如果我错了,任何优化此代码的帮助都将不胜感激。)
(我很可能是错的,因为这条线似乎不应该这么糟糕。 tmp
并不大(3x1 双倍),但复制/分配确实发生了很多。我专注于这个副本,因为我没有看到任何其他fbatch
看起来会触发的东西Eigen::internal::dense_assignment_loop::run
)
for ( index_type i = 0; i <= n; ++i )
{
de_casteljau( tmp, B_v[ i ], v );
temp_cp.row( i ) = tmp;
}
我想通过将此代码更改为来消除临时和复制
for ( index_type i = 0; i <= n; ++i )
{
de_casteljau( temp_cp.row( i ), B_v[ i ], v );
}
但是,这需要.row( i )
作为左值引用传递。clang 抱怨这样的事情......
error: no matching function for call to 'de_casteljau'
note: candidate function [with Derived1 = Eigen::Block<Eigen::Matrix<double, -1, 3>, 1, 3>, Derived2 = Eigen::Map<Eigen::Matrix<double, -1, 3>, 0, Eigen::Stride<1, -1>>] not viable: expects an lvalue for 1st argument
27 | void de_casteljau(Eigen::MatrixBase<Derived1> &p, const Eigen::MatrixBase<Derived2> &cp, const typename Derived2::Scalar &t)
在简单应用时Eigen::Ref
,我尝试将第一个函数声明从:
template < typename Derived1, typename Derived2 >
void de_casteljau( Eigen::MatrixBase < Derived1 > & p, const Eigen::MatrixBase < Derived2 > & cp, const typename Derived2::Scalar & t )
到
template < typename Derived1, typename Derived2 >
void de_casteljau( Eigen::Ref < Eigen::MatrixBase < Derived1 > > p, const Eigen::MatrixBase < Derived2 > & cp, const typename Derived2::Scalar & t )
这给了我一个错误,例如:
error: no matching function for call to 'de_casteljau'
note: candidate template ignored: could not match 'Ref' against 'Matrix'
27 | void de_casteljau(Eigen::Ref < Eigen::MatrixBase<Derived1> > p, const Eigen::MatrixBase<Derived2> &cp, const typename Derived2::Scalar &t)
此错误在程序中出现得非常早。理想情况下,我希望以一种de_casteljau
不需要修改其许多调用站点的方式修复它。
template < typename Derived1, typename Derived2 >
void de_casteljau( Eigen::MatrixBase < Derived1 > & p, const Eigen::MatrixBase < Derived2 > & cp, const typename Derived2::Scalar & t )
{
// do some checks on incoming matrix dimensions
assert( p.cols() == cp.cols() );
Eigen::Matrix < typename Derived2::Scalar, Eigen::Dynamic, Eigen::Dynamic > Q( cp );
typename Derived2::Scalar oneminust( 1 - t );
typename Derived2::Index k, i;
for ( k = 1; k < Q.rows(); ++k )
{
// Q.topRows( Q.rows() - k ) = oneminust * Q.topRows( Q.rows() - k ) + t * Q.middleRows( 1, Q.rows() - k );
// This for-loop is about 10% faster than equivalent one-liner above.
for ( i = 0; i < Q.rows() - k; ++i )
{
Q.row( i ) = oneminust * Q.row( i ) + t * Q.row( i + 1 );
}
}
p = Q.row( 0 );
}
typedef Eigen::Matrix < data_type, 1, dim__ > point_type;
typedef typename point_type::Index index_type;
typedef Eigen::Map < Eigen::Matrix < data_type, Eigen::Dynamic, dim__ >,
Eigen::Unaligned,
Eigen::Stride < 1, dim__ > > control_point_matrix_type;
typedef Eigen::Map < Eigen::Matrix<data_type, Eigen::Dynamic, dim__ >,
Eigen::Unaligned,
Eigen::Stride < 1, Eigen::Dynamic > > v_dir_control_point_matrix_type;
typedef std::vector < data_type > control_point_container;
typedef std::vector < control_point_matrix_type > u_control_point_matrix_container;
typedef std::vector < v_dir_control_point_matrix_type > v_control_point_matrix_container;
control_point_container point_data; /** raw control points stored in vector. The order is {x,y...}_(0,0) {x,y...}_(1,0) ... {x,y...}_(n,0) {x,y...}_(0,1) {x,y...}_(1,1) ... {x,y...}_(n,1) ... {x,y...}_(n,m) */
u_control_point_matrix_container B_u; /** vector of u-direction, i.e. direction where v is constant, curve control points in point_data */
v_control_point_matrix_container B_v; /** vector of v-direction, i.e. direction where u is constant, curve control points in point_data */
static void resize( u_control_point_matrix_container &uvec, v_control_point_matrix_container &vvec, control_point_container &data, const index_type &u_dim, const index_type &v_dim )
{
// allocate the control points
data.resize( dim__ * ( u_dim + 1 ) * ( v_dim + 1 ) );
// set the B_u and B_v maps
set_Bs( uvec, vvec, data, u_dim, v_dim);
}
static void set_Bs( u_control_point_matrix_container &uvec, v_control_point_matrix_container &vvec, control_point_container &data, index_type n, index_type m )
{
// allocate vectors of control point maps
uvec.resize( m + 1, control_point_matrix_type( nullptr, m + 1, dim__, Eigen::Stride < 1, dim__ > () ) );
for ( index_type j = 0; j <= m; ++j )
{
new ( uvec.data() + j ) control_point_matrix_type( data.data() + j * ( n + 1 ) * dim__, n + 1, dim__, Eigen::Stride < 1, dim__ > () ) ;
}
vvec.resize( n + 1, v_dir_control_point_matrix_type( nullptr, n + 1, dim__, Eigen::Stride < 1, Eigen::Dynamic > ( 1, ( n + 1 ) * dim__ ) ) );
for ( index_type i = 0; i <= n; ++i )
{
new ( vvec.data() + i ) v_dir_control_point_matrix_type( data.data() + i * dim__, m + 1, dim__, Eigen::Stride < 1, Eigen::Dynamic > ( 1, ( n + 1 ) * dim__ ) );
}
}
void set_Bs( index_type n, index_type m )
{
set_Bs( B_u, B_v, point_data, n, m );
}
void fbatch( index_type i0, index_type j0, index_type nu, index_type nv, const std::vector < data_type > &uvec, const std::vector < data_type > &vvec, std::vector < std::vector < point_type > > &ptmat ) const
{
point_type ans, tmp;
Eigen::Matrix < data_type, Eigen::Dynamic, dim__ > temp_cp;
index_type n( degree_u() ), m( degree_v() );
// check to make sure have valid curve
assert( n >= 0 );
assert( m >= 0 );
temp_cp.resize( n + 1, dim__ );
for ( index_type j = 0; j < nv; j++ )
{
data_type v = vvec[ j0 + j ];
// check to make sure given valid parametric value
assert( ( v >= 0 ) && ( v <= 1 ) );
// build the temporary control points
for ( index_type i = 0; i <= n; ++i )
{
de_casteljau( tmp, B_v[ i ], v );
temp_cp.row( i ) = tmp;
}
for (index_type i = 0; i < nu; i++ )
{
data_type u = uvec[ i0 + i ];
// check to make sure given valid parametric value
assert( ( u >= 0 ) && ( u <= 1 ) );
de_casteljau( ptmat[ i + i0 ][ j0 + j ], temp_cp, u );
}
}
}