I was able to get a 10% boost using similar hacks to this. I bypassed the fc library entirely and most of the C++ code and just used the secp256k1-zkp code directly (even modified it a little bit). This was actually an initial attempt to reimplement the mining algorithm at a lower level to see how much work would be necessary to do a GPU miner (hint: a lot!).
The code I had written (a while ago) is so messy and ugly, so I feel embarrassed posting the full thing. And the project is not important enough to me to spend time tidying up the code and make it better. But I will post a snippet of it for anyone who wants to get an idea of what I did:
pow_operation mine_pow2( const fc::ecc::private_key& w, const block_id_type& block_id, uint8_t target_bits, uint64_t start_nonce, uint32_t nonce_limit )
{
auto ctx = get_context();
secp256k1_sha256_t sha;
auto seckey = reinterpret_cast<const unsigned char*>(w.get_secret().data());
unsigned char hash[32];
secp256k1_sha256_initialize(&sha);
secp256k1_sha256_write(&sha, reinterpret_cast<const unsigned char*>(block_id.data()), 20);
secp256k1_sha256_finalize(&sha, hash);
uint64_t* hash_first_word_ptr = reinterpret_cast<uint64_t*>(hash);
unsigned char sig65[65];
int recid = 0;
unsigned char pubkey[33];
int pubkeylen;
unsigned char signing_nonce[32];
int count = 0;
while (true) {
if( secp256k1_nonce_function_default(signing_nonce, hash, seckey, count, NULL) )
break;
++count;
}
unsigned char input[32];
unsigned char sig_hash[32];
unsigned char work[32];
auto nonce_end = start_nonce + nonce_limit;
for (auto n = start_nonce; n < nonce_end; ++n)
{
// Main inner loop
*hash_first_word_ptr = n;
secp256k1_sha256_initialize(&sha);
secp256k1_sha256_write(&sha, hash, 32);
secp256k1_sha256_finalize(&sha, input);
// Call custom ECDSA signing function I created in secp256k1-zkp to avoid unnecessary nonce generation.
if( !secp256k1_ecdsa_sign_compact2(ctx, input, &sig65[1], seckey, signing_nonce, &recid) )
continue;
sig65[0] = 31 + recid;
secp256k1_sha256_initialize(&sha);
secp256k1_sha256_write(&sha, sig65, 65);
secp256k1_sha256_finalize(&sha, sig_hash);
if( !secp256k1_ecdsa_recover_compact(ctx, sig_hash, &sig65[1], pubkey, &pubkeylen, 1, recid) )
continue;
secp256k1_sha256_initialize(&sha);
secp256k1_sha256_write(&sha, pubkey, 33);
secp256k1_sha256_finalize(&sha, work);
auto bits = get_target_bits(work);
if( bits >= target_bits ) {
fc::array<unsigned char, 65> sig;
for (auto i = 0; i < 65; ++i)
sig.at(i) = sig65[i];
pow_operation pow_op;
pow_op.block_id = block_id;
pow_op.nonce = n;
pow_op.work.worker = w.get_public_key();
pow_op.work.input = fc::sha256(reinterpret_cast<const char*>(input), 32);
pow_op.work.signature = static_cast<fc::ecc::compact_signature>(sig);
pow_op.work.work = fc::sha256(reinterpret_cast<const char*>(work), 32);
if( validate_pow(pow_op) )
std::cout << "The POW solved using mine_pow2 is valid!" << std::endl;
else
std::cout << "The POW solve using mine_pow2 is not valid." << std::endl;
return pow_op;
}
}
FC_ASSERT(false, "Miner could not find valid nonce");
}
uint8_t get_target_bits(const unsigned char* hash32) {
// Assumes 64-bit little-endian machine
// Only looks at first 64-bits of hash. This means it cannot tell if target bits are > 64.
uint8_t bits = 0;
uint8_t b = 0;
for(uint8_t i = 0 ; i < 8; ++i )
{
b = hash32[i];
if( b != 0 ) {
// Flip order of bits in byte b
b = (b & 0xF0) >> 4 | (b & 0x0F) << 4;
b = (b & 0xCC) >> 2 | (b & 0x33) << 2;
b = (b & 0xAA) >> 1 | (b & 0x55) << 1;
while( (b & 0x1) == 0 )
{
++bits;
b >>= 1;
}
return bits;
}
bits += 8;
}
return bits;
}