Страницы

Поиск по вопросам

воскресенье, 15 декабря 2019 г.

C# Хеш от строки, нужен алгоритм

#c_sharp #хеширование


В C# у типа string есть метод GetHashCode() возвращающий хеш значение типа int.
Этот метод на разных версия платформы .Net Framework возвращает разное значение.
Для моей задачи требуется, чтобы возвращаемое значение было одинаковое на разных
платформах. Нужен алгоритм вычисления хеша строки возвращающий int. Может кто-нибудь
предложить такой алгоритм?

Уточнения:


Без использования unsafe кода.
Весь диапазон int (положительные и отрицательные числа)
Входная строка небольшая.

    


Ответы

Ответ 1



Можно взять любую реализацию String.GetHashCode и использовать ее, вот переписанные без unsafe 2 версии. Можете убедиться, что StringHashCode40 возвращает тоже самое, что и нативный GetHashCode: public static int StringHashCode20(string value) { int num = 352654597; int num2 = num; for (int i = 0; i < value.Length; i += 4) { int ptr0 = value[i] << 16; if (i + 1 < value.Length) ptr0 |= value[i + 1]; num = (num << 5) + num + (num >> 27) ^ ptr0; if (i + 2 < value.Length) { int ptr1 = value[i + 2] << 16; if (i + 3 < value.Length) ptr1 |= value[i + 3]; num2 = (num2 << 5) + num2 + (num2 >> 27) ^ ptr1; } } return num + num2 * 1566083941; } public static int StringHashCode40(string value) { int num = 5381; int num2 = num; for (int i = 0; i < value.Length; i += 2) { num = (((num << 5) + num) ^ value[i]); if (i + 1 < value.Length) num2 = (((num2 << 5) + num2) ^ value[i + 1]); } return num + num2 * 1566083941; } static void Main(string[] args) { string value = "123"; Console.WriteLine(StringHashCode20(value)); Console.WriteLine(StringHashCode40(value)); Console.WriteLine(value.GetHashCode()); }

Ответ 2



Как-то завалялся у меня код множества функций для вычисления хэша для строк (правда, код на Си, а не шарпе, но при желании его (надеюсь) легко можно адаптировать) #include /* https://ru.wikipedia.org/wiki/%D0%A5%D0%B5%D1%88%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5 https://ru.wikipedia.org/wiki/Хеширование General Purpose Hash Function Algorithms http://www.partow.net/programming/hashfunctions/#RSHashFunction Available Hash Functions The General Hash Functions Library has the following mix of additive and rotative general purpose string hashing algorithms. The following algorithms vary in usefulness and functionality and are mainly intended as an example for learning how hash functions operate and what they basically look like in code form. RS Hash Function A simple hash function from Robert Sedgwicks Algorithms in C book. I've added some simple optimizations to the algorithm in order to speed up its hashing process. */ unsigned int RSHash (char *str, unsigned int len) { unsigned int b = 378551; unsigned int a = 63689; unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = hash * a + (unsigned char)(*str); a = a * b; } return hash; } /* JS Hash Function A bitwise hash function written by Justin Sobel */ unsigned int JSHash (char *str, unsigned int len) { unsigned int hash = 1315423911; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash ^= ((hash << 5) + (unsigned char)(*str) + (hash >> 2)); } return hash; } /* PJW Hash Function This hash algorithm is based on work by Peter J. Weinberger of AT&T Bell Labs. The book Compilers (Principles, Techniques and Tools) by Aho, Sethi and Ulman, recommends the use of hash functions that employ the hashing methodology found in this particular algorithm. */ unsigned int PJWHash (char *str, unsigned int len) { unsigned int BitsInUnsignedInt = (unsigned int) (sizeof (unsigned int) * 8); unsigned int ThreeQuarters = (unsigned int) ((BitsInUnsignedInt * 3) / 4); unsigned int OneEighth = (unsigned int) (BitsInUnsignedInt / 8); unsigned int HighBits = (unsigned int) (0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); unsigned int hash = 0; unsigned int test = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = (hash << OneEighth) + (unsigned char)(*str); if ((test = hash & HighBits) != 0) { hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); } } return hash; } /* ELF Hash Function Similar to the PJW Hash function, but tweaked for 32-bit processors. Its the hash function widely used on most UNIX systems. */ unsigned int ELFHash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int x = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = (hash << 4) + (unsigned char)(*str); if ((x = hash & 0xF0000000L) != 0) { hash ^= (x >> 24); hash &= ~x; } } return hash; } /* BKDR Hash Function This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function. */ unsigned int BKDRHash (char *str, unsigned int len) { unsigned int seed = 131313; /* 31 131 1313 13131 131313 etc.. */ unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = (hash * seed) + (unsigned char)(*str); } return hash; } /* SDBM Hash Function This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set. */ unsigned int SDBMHash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = (unsigned char)(*str) + (hash << 6) + (hash << 16) - hash; } return hash; } /* DJB Hash Function An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published. */ unsigned int DJBHash (char *str, unsigned int len) { unsigned int hash = 5381; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = ((hash << 5) + hash) + (unsigned char)(*str); } return hash; } /* DEK Hash Function An algorithm proposed by Donald E. Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4. */ unsigned int DEKHash (char *str, unsigned int len) { unsigned int hash = len; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = ((hash << 5) ^ (hash >> 27)) ^ (unsigned char)(*str); } return hash; } /* AP Hash Function An algorithm produced by me Arash Partow. I took ideas from all of the above hash functions making a hybrid rotative and additive hash function algorithm. There isn't any real mathematical analysis explaining why one should use this hash function instead of the others described above other than the fact that I tired to resemble the design as close as possible to a simple LFSR. An empirical result which demonstrated the distributive abilities of the hash algorithm was obtained using a hash-table with 100003 buckets, hashing The Project Gutenberg Etext of Webster's Unabridged Dictionary, the longest encountered chain length was 7, the average chain length was 2, the number of empty buckets was 4579. */ unsigned int APHash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash ^= ((i & 1) == 0) ? ((hash << 7) ^ (unsigned char)(*str) ^ (hash >> 3)) : (~((hash << 11) ^ (unsigned char)(*str) ^ (hash >> 5))); } return hash; } /* Еще несколько аналогичных функций из того же источника http://vak.ru/doku.php/proj/hash/sources LY Hash Function Congruential generator proposed by Leonid Yuriev. Multiplier constant suggested by M.Lavaux & F.Janssens. */ unsigned int LYHash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash = (hash * 1664525) + (unsigned char)(*str) + 1013904223; } return hash; } /* ROT13 Hash Function No multiplication, by Serge Vakulenko. Two shifts are converted by GCC 4 to a single rotation instruction. */ unsigned int ROT13Hash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash += (unsigned char)(*str); hash -= (hash << 13) | (hash >> 19); } return hash; } /* FAQ6 Hash Function From Bob Jenkins Hash Function FAQ: http://burtleburtle.net/bob/hash/hashfaq.html */ unsigned int bob_faq6_hash (char *str, unsigned int len) { unsigned int hash = 0; unsigned int i = 0; for (i = 0; i < len; str++, i++) { hash += (unsigned char)(*str); hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash; } uint32_t crc_table [256] = { 0, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; unsigned int crc32 (char *buf, unsigned int len) { unsigned int crc; crc = 0xffffffff; while (len--) { crc = crc_table [(crc ^ (unsigned char)*buf++) & 0xff] ^ (crc >> 8); } return crc ^ 0xffffffff; } // и еще несколько популярных хэшей /* FNV hash The FNV hash, short for Fowler/Noll/Vo in honor of the creators, is a very powerful algorithm that, not surprisingly, follows the same lines as Bernstein's modified hash with carefully chosen constants. This algorithm has been used in many applications with wonderful results, and for its simplicity, the FNV hash should be one of the first hashes tried in an application. It is also recommended that the FNV website be visited for useful descriptions of how to modify the algorithm for various uses. */ uint32_t FNVHash (char *key, uint32_t len) { unsigned char *p = (unsigned char *)key; unsigned h = 2166136261; uint32_t i; for (i = 0; i < len; i++) h = (h * 16777619) ^ p[i]; return h; } // One-byte-at-a-time hash based on Murmur's mix // модифицирован для seed = 0 uint32_t MurmurOAAT (char *key, uint32_t len) { const uint8_t * data = (const uint8_t*)key; uint32_t h = 0; //seed; for(uint32_t i = 0; i < len; i++) { h ^= data[i]; h *= 0x5bd1e995; h ^= h >> 15; } return h; } /* https://github.com/jdavisp3/perp/blob/master/lasagna/hfunc/hfunc_kx17.c ** Peter Kankowski[*6] "x17" ** much like ghfa with init=0, mult=17 ** except: a constant of 32 is subtracted from each key byte before mixing ** ** theory: ** x17 hash function subtracts a space from each letter to cut off ** the control characters in the range 0x00..0x1F. If the hash keys ** are long and contain only Latin letters and numbers, the letters ** will be less frequently shifted out, and the overall number of ** collisions will be lower. You can even subtract 'A' when you know ** that the keys will be only English words. ** see Peter Kanowski's website http://www.strchr.com/hash_functions */ uint32_t hfunc_kx17 (const unsigned char *key, unsigned klen) { uint32_t h = 0; while(klen){ h = ((h << 4) + h) + (*key - ' '); ++key; --klen; } return h; } // Далее модификации тех же функций для C-строк // (массив символов, завершающийся нулем) uint32_t hfunc_kx17_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) hash = ((hash << 4) + hash) + (c - ' '); return hash; } uint32_t crc32_s (char *s) { uint32_t crc = 0xffffffff; unsigned char c; while ((c = *s++)) crc = crc_table[(crc ^ c) & 0xff] ^ (crc >> 8); return crc ^ 0xffffffff; } uint32_t MurmurOAAT_s (char *key) { const uint8_t * data = (const uint8_t*)key; uint32_t h = 0; //seed; for(int i = 0; data[i]; i++) { h ^= data[i]; h *= 0x5bd1e995; h ^= h >> 15; } return h; } uint32_t FNVHash_s (char *key) { unsigned char *p = (unsigned char *)key; unsigned h = 2166136261; int i; for (i = 0; p[i]; i++) h = (h * 16777619) ^ p[i]; return h; } uint32_t RSHash_s (char *str) { unsigned int b = 378551; unsigned int a = 63689; unsigned int hash = 0; unsigned char c; while ((c = *str++)) { hash = hash * a + c; a = a * b; } return hash; } uint32_t JSHash_s (char *str) { unsigned int hash = 1315423911; unsigned char c; while ((c = *str++)) hash ^= ((hash << 5) + c + (hash >> 2)); return hash; } uint32_t PJWHash_s (char *str) { unsigned int BitsInUnsignedInt = (unsigned int) (sizeof (unsigned int) * 8); unsigned int ThreeQuarters = (unsigned int) ((BitsInUnsignedInt * 3) / 4); unsigned int OneEighth = (unsigned int) (BitsInUnsignedInt / 8); unsigned int HighBits = (unsigned int) (0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); unsigned int hash = 0; unsigned int test = 0; unsigned char c; while ((c = *str++)) { hash = (hash << OneEighth) + c; if ((test = hash & HighBits) != 0) { hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); } } return hash; } uint32_t ELFHash_s (char *str) { unsigned int hash = 0; unsigned int x = 0; unsigned char c; while ((c = *str++)) { hash = (hash << 4) + c; if ((x = hash & 0xF0000000L) != 0) { hash ^= (x >> 24); hash &= ~x; } } return hash; } uint32_t BKDRHash_s (char *str) { unsigned int seed = 131313; /* 31 131 1313 13131 131313 etc.. */ unsigned int hash = 0; unsigned char c; while ((c = *str++)) hash = (hash * seed) + c; return hash; } uint32_t SDBMHash_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) hash = c + (hash << 6) + (hash << 16) - hash; return hash; } uint32_t DJBHash_s (char *str) { unsigned int hash = 5381; unsigned char c; while ((c = *str++)) hash = ((hash << 5) + hash) + c; return hash; } // Модифицировано начальное значение хэша (в оригинале длина строки) uint32_t DEKHash_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) hash = ((hash << 5) ^ (hash >> 27)) ^ c; return hash; } uint32_t APHash_s (char *str) { unsigned int hash = 0; unsigned int i; unsigned char c; for (i = 0; (c = str[i]); i++) { hash ^= ((i & 1) == 0) ? ((hash << 7) ^ c ^ (hash >> 3)) : (~((hash << 11) ^ c ^ (hash >> 5))); } return hash; } uint32_t LYHash_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) hash = (hash * 1664525) + c + 1013904223; return hash; } uint32_t ROT13Hash_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) { hash += c; hash -= (hash << 13) | (hash >> 19); } return hash; } uint32_t bob_faq6_hash_s (char *str) { unsigned int hash = 0; unsigned char c; while ((c = *str++)) { hash += c; hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash; } /* misc references cited: Julienne Walker, "Eternally Confuzzled - The Art of Hashing" http://www.eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx FNV hash http://www.isthe.com/chongo/tech/comp/fnv/ Bob Jenkins on hash functions http://burtleburtle.net/bob/hash/doobs.html http://burtleburtle.net/bob/hash/examhash.html Paul Hsieh, Super Fast Hash http://www.azillionmonkeys.com/qed/hash.html Peter Kankowski, An empirical comparison http://www.strchr.com/hash_functions Austin Appleby, Murmur Hash http://sites.google.com/site/murmurhash/ Functions implementation for General Purpose Hash Function Algorithms http://vak.ru/doku.php/proj/hash/sources (links current at 2017.01.15) */

Комментариев нет:

Отправить комментарий