SenseVoicecpp gguf大模型[AI人工智能(七十四)]—东方仙盟

张开发

• 2026/6/9 19:26:34 • 15 分钟阅读

分享文章

gguf.cpp核心代码完整代码#include ggml.h #include ggml-backend.h #include ggml-impl.h #include gguf.h #include cinttypes #include cstddef #include cstdint #include cstdio #include cstdlib #include cstring #include map #include new #include stdexcept #include string #include vector #define GGUF_MAX_STRING_LENGTH (1024*1024*1024) #define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024) #ifdef _WIN32 # define gguf_ftell _ftelli64 # define gguf_fseek _fseeki64 #else # define gguf_ftell ftello # define gguf_fseek fseeko #endif template typename T struct type_to_gguf_type; template struct type_to_gguf_typeuint8_t { static constexpr enum gguf_type value GGUF_TYPE_UINT8; }; template struct type_to_gguf_typeint8_t { static constexpr enum gguf_type value GGUF_TYPE_INT8; }; template struct type_to_gguf_typeuint16_t { static constexpr enum gguf_type value GGUF_TYPE_UINT16; }; template struct type_to_gguf_typeint16_t { static constexpr enum gguf_type value GGUF_TYPE_INT16; }; template struct type_to_gguf_typeuint32_t { static constexpr enum gguf_type value GGUF_TYPE_UINT32; }; template struct type_to_gguf_typeint32_t { static constexpr enum gguf_type value GGUF_TYPE_INT32; }; template struct type_to_gguf_typefloat { static constexpr enum gguf_type value GGUF_TYPE_FLOAT32; }; template struct type_to_gguf_typebool { static constexpr enum gguf_type value GGUF_TYPE_BOOL; }; template struct type_to_gguf_typestd::string { static constexpr enum gguf_type value GGUF_TYPE_STRING; }; template struct type_to_gguf_typeuint64_t { static constexpr enum gguf_type value GGUF_TYPE_UINT64; }; template struct type_to_gguf_typeint64_t { static constexpr enum gguf_type value GGUF_TYPE_INT64; }; template struct type_to_gguf_typedouble { static constexpr enum gguf_type value GGUF_TYPE_FLOAT64; }; static const std::mapgguf_type, size_t GGUF_TYPE_SIZE { {GGUF_TYPE_UINT8, sizeof(uint8_t)}, {GGUF_TYPE_INT8, sizeof(int8_t)}, {GGUF_TYPE_UINT16, sizeof(uint16_t)}, {GGUF_TYPE_INT16, sizeof(int16_t)}, {GGUF_TYPE_UINT32, sizeof(uint32_t)}, {GGUF_TYPE_INT32, sizeof(int32_t)}, {GGUF_TYPE_FLOAT32, sizeof(float)}, {GGUF_TYPE_BOOL, sizeof(int8_t)}, {GGUF_TYPE_STRING, 0}, // undefined {GGUF_TYPE_ARRAY, 0}, // undefined {GGUF_TYPE_UINT64, sizeof(uint64_t)}, {GGUF_TYPE_INT64, sizeof(int64_t)}, {GGUF_TYPE_FLOAT64, sizeof(double)}, }; static_assert(GGUF_TYPE_COUNT 13, GGUF_TYPE_COUNT ! 13); static const std::mapgguf_type, const char * GGUF_TYPE_NAME { {GGUF_TYPE_UINT8, u8}, {GGUF_TYPE_INT8, i8}, {GGUF_TYPE_UINT16, u16}, {GGUF_TYPE_INT16, i16}, {GGUF_TYPE_UINT32, u32}, {GGUF_TYPE_INT32, i32}, {GGUF_TYPE_FLOAT32, f32}, {GGUF_TYPE_BOOL, bool}, {GGUF_TYPE_STRING, str}, {GGUF_TYPE_ARRAY, arr}, {GGUF_TYPE_UINT64, u64}, {GGUF_TYPE_INT64, i64}, {GGUF_TYPE_FLOAT64, f64}, }; static_assert(GGUF_TYPE_COUNT 13, GGUF_TYPE_COUNT ! 13); size_t gguf_type_size(enum gguf_type type) { auto it GGUF_TYPE_SIZE.find(type); return it GGUF_TYPE_SIZE.end() ? 0 : it-second; } struct gguf_kv { std::string key; bool is_array; enum gguf_type type; std::vectorint8_t data; std::vectorstd::string data_string; template typename T gguf_kv(const std::string key, const T value) : key(key), is_array(false), type(type_to_gguf_typeT::value) { GGML_ASSERT(!key.empty()); data.resize(sizeof(T)); memcpy(data.data(), value, sizeof(T)); } template typename T gguf_kv(const std::string key, const std::vectorT value) : key(key), is_array(true), type(type_to_gguf_typeT::value) { GGML_ASSERT(!key.empty()); data.resize(value.size()*sizeof(T)); for (size_t i 0; i value.size(); i) { const T tmp value[i]; memcpy(data.data() i*sizeof(T), tmp, sizeof(T)); } } gguf_kv(const std::string key, const std::string value) : key(key), is_array(false), type(GGUF_TYPE_STRING) { GGML_ASSERT(!key.empty()); data_string.push_back(value); } gguf_kv(const std::string key, const std::vectorstd::string value) : key(key), is_array(true), type(GGUF_TYPE_STRING) { GGML_ASSERT(!key.empty()); data_string value; } const std::string get_key() const { return key; } const enum gguf_type get_type() const { return type; } size_t get_ne() const { if (type GGUF_TYPE_STRING) { const size_t ne data_string.size(); GGML_ASSERT(is_array || ne 1); return ne; } const size_t type_size gguf_type_size(type); GGML_ASSERT(data.size() % type_size 0); const size_t ne data.size() / type_size; GGML_ASSERT(is_array || ne 1); return ne; } template typename T const T get_val(const size_t i 0) const { GGML_ASSERT(type_to_gguf_typeT::value type); if constexpr (std::is_sameT, std::string::value) { GGML_ASSERT(data_string.size() i1); return data_string[i]; } const size_t type_size gguf_type_size(type); GGML_ASSERT(data.size() % type_size 0); GGML_ASSERT(data.size() (i1)*type_size); return reinterpret_castconst T *(data.data())[i]; } void cast(const enum gguf_type new_type) { const size_t new_type_size gguf_type_size(new_type); GGML_ASSERT(data.size() % new_type_size 0); type new_type; } }; struct gguf_tensor_info { struct ggml_tensor t; // for holding the equivalent info uint64_t offset; // offset from start of data, must be a multiple of ALIGNMENT }; struct gguf_context { uint32_t version GGUF_VERSION; std::vectorstruct gguf_kv kv; std::vectorstruct gguf_tensor_info info; size_t alignment GGUF_DEFAULT_ALIGNMENT; size_t offset 0; // offset of data from beginning of file size_t size 0; // size of data in bytes void * data nullptr; }; struct gguf_reader { gguf_reader(FILE * file) : file(file) { // read the remaining bytes once and update on each read nbytes_remain file_remain(file); } // helper for remaining bytes in a file static uint64_t file_remain(FILE * file) { const int64_t cur gguf_ftell(file); if (cur 0) { return 0; } if (gguf_fseek(file, 0, SEEK_END) ! 0) { gguf_fseek(file, cur, SEEK_SET); return 0; } const int64_t end gguf_ftell(file); if (end 0) { gguf_fseek(file, cur, SEEK_SET); return 0; } gguf_fseek(file, cur, SEEK_SET); return static_castuint64_t(end - cur); } template typename T bool read(T dst) const { const size_t size sizeof(dst); if (nbytes_remain size) { return false; } const size_t nread fread(dst, 1, size, file); nbytes_remain - nread; return nread size; } template typename T bool read(std::vectorT dst, const size_t n) const { if (n GGUF_MAX_ARRAY_ELEMENTS) { return false; } if constexpr (std::is_sameT, std::string::value) { // strings are prefixed with their length, so we need to account for that if (n SIZE_MAX / sizeof(uint64_t)) { return false; } if (nbytes_remain n * sizeof(uint64_t)) { return false; } } else { if (n SIZE_MAX / sizeof(T)) { return false; } if (nbytes_remain n * sizeof(T)) { return false; } } dst.resize(n); for (size_t i 0; i dst.size(); i) { if constexpr (std::is_sameT, bool::value) { bool tmp; if (!read(tmp)) { return false; } dst[i] tmp; } else { if (!read(dst[i])) { return false; } } } return true; } bool read(bool dst) const { int8_t tmp -1; if (!read(tmp)) { return false; } dst tmp ! 0; return true; } bool read(enum ggml_type dst) const { int32_t tmp -1; if (!read(tmp)) { return false; } dst ggml_type(tmp); return true; } bool read(enum gguf_type dst) const { int32_t tmp -1; if (!read(tmp)) { return false; } dst gguf_type(tmp); return true; } bool read(std::string dst) const { uint64_t size 0; if (!read(size)) { return false; } if (size GGUF_MAX_STRING_LENGTH) { GGML_LOG_ERROR(%s: string length % PRIu64 exceeds maximum % PRIu64 \n, __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH); return false; } if (size nbytes_remain) { GGML_LOG_ERROR(%s: string length % PRIu64 exceeds remaining file size % PRIu64 bytes\n, __func__, size, nbytes_remain); return false; } dst.resize(static_castsize_t(size)); const size_t nread fread(dst.data(), 1, size, file); nbytes_remain - nread; return nread size; } bool read(void * dst, const size_t size) const { if (size nbytes_remain) { return false; } const size_t nread fread(dst, 1, size, file); nbytes_remain - nread; return nread size; } private: FILE * file; mutable uint64_t nbytes_remain; }; struct gguf_context * gguf_init_empty(void) { return new gguf_context; } templatetypename T bool gguf_read_emplace_helper(const struct gguf_reader gr, std::vectorstruct gguf_kv kv, const std::string key, const bool is_array, const size_t n) { if (is_array) { std::vectorT value; try { if (!gr.read(value, n)) { return false; } } catch (std::length_error ) { GGML_LOG_ERROR(%s: encountered length_error while reading value for key %s\n, __func__, key.c_str()); return false; } catch (std::bad_alloc ) { GGML_LOG_ERROR(%s: encountered bad_alloc error while reading value for key %s\n, __func__, key.c_str()); return false; } kv.emplace_back(key, value); } else { T value; if (!gr.read(value)) { return false; } kv.emplace_back(key, value); } return true; } struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) { const struct gguf_reader gr(file); struct gguf_context * ctx new gguf_context; bool ok true; // file magic { std::vectorchar magic; ok ok gr.read(magic, 4); if (!ok) { GGML_LOG_ERROR(%s: failed to read magic\n, __func__); gguf_free(ctx); return nullptr; } for (uint32_t i 0; i magic.size(); i) { if (magic[i] ! GGUF_MAGIC[i]) { char c0 isprint(magic[0]) ? magic[0] : ?; char c1 isprint(magic[1]) ? magic[1] : ?; char c2 isprint(magic[2]) ? magic[2] : ?; char c3 isprint(magic[3]) ? magic[3] : ?; GGML_LOG_ERROR(%s: invalid magic characters: %c%c%c%c, expected GGUF\n, __func__, c0, c1, c2, c3); gguf_free(ctx); return nullptr; } } } // header int64_t n_kv 0; int64_t n_tensors 0; if (ok gr.read(ctx-version)) { if (ok ctx-version 0) { GGML_LOG_ERROR(%s: bad GGUF version: % PRIu32 \n, __func__, ctx-version); ok false; } /* * bit layout is different when reading non-native endian models. * assuming that the GGUF version is 3, the non-native endian model * would read it as 0x30000000. we can use the AND operation against * the last 4 hexadecimal digits to check if the model is the same * endianness as the host system. */ if (ok (ctx-version 0x0000FFFF) 0x00000000) { GGML_LOG_ERROR(%s: failed to load model: this GGUF file version % PRIu32 is extremely large, is there a mismatch between the host and model endianness?\n, __func__, ctx-version); ok false; } if (ok ctx-version 1) { GGML_LOG_ERROR(%s: GGUFv1 is no longer supported, please use a more up-to-date version\n, __func__); ok false; } if (ok ctx-version GGUF_VERSION) { GGML_LOG_ERROR(%s: this GGUF file is version % PRIu32 but this software only supports up to version %d\n, __func__, ctx-version, GGUF_VERSION); ok false; } } else { ok false; } if (ok gr.read(n_tensors)) { static_assert(sizeof(size_t) 8 sizeof(gguf_tensor_info) 2, int64_t insufficient for indexing); if (n_tensors 0 || n_tensors int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) { GGML_LOG_ERROR(%s: number of tensors is % PRIi64 but must be in [0, %zu]\n, __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info)); ok false; } } else { ok false; } if (ok gr.read(n_kv)) { static_assert(sizeof(size_t) 8 sizeof(gguf_tensor_info) 2, int64_t insufficient for indexing); if (n_kv 0 || n_kv int64_t(SIZE_MAX/sizeof(gguf_kv))) { GGML_LOG_ERROR(%s: number of key value pairs is % PRIi64 but must be in [0, %zu]\n, __func__, n_kv, SIZE_MAX/sizeof(gguf_kv)); ok false; } } else { ok false; } if (!ok) { GGML_LOG_ERROR(%s: failed to read header\n, __func__); gguf_free(ctx); return nullptr; } // KV pairs { for (int64_t i 0; ok i n_kv; i) { std::string key; gguf_type type gguf_type(-1); bool is_array false; uint64_t n 1; try { ok ok gr.read(key); } catch (std::length_error ) { GGML_LOG_ERROR(%s: encountered length_error while reading key % PRIi64 \n, __func__, i); ok false; } catch (std::bad_alloc ) { GGML_LOG_ERROR(%s: encountered bad_alloc error while reading key % PRIi64 \n, __func__, i); ok false; } for (size_t j 0; ok j ctx-kv.size(); j) { if (key ctx-kv[j].key) { GGML_LOG_ERROR(%s: duplicate key %s for tensors %zu and % PRIi64 \n, __func__, key.c_str(), j, i); ok false; } } if (!ok) { break; } ok ok gr.read(type); if (type GGUF_TYPE_ARRAY) { is_array true; ok ok gr.read(type); ok ok gr.read(n); } if (!ok) { break; } switch (type) { case GGUF_TYPE_UINT8: ok ok gguf_read_emplace_helperuint8_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_INT8: ok ok gguf_read_emplace_helperint8_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_UINT16: ok ok gguf_read_emplace_helperuint16_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_INT16: ok ok gguf_read_emplace_helperint16_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_UINT32: ok ok gguf_read_emplace_helperuint32_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_INT32: ok ok gguf_read_emplace_helperint32_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_FLOAT32: ok ok gguf_read_emplace_helperfloat (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_BOOL: ok ok gguf_read_emplace_helperbool (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_STRING: ok ok gguf_read_emplace_helperstd::string(gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_UINT64: ok ok gguf_read_emplace_helperuint64_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_INT64: ok ok gguf_read_emplace_helperint64_t (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_FLOAT64: ok ok gguf_read_emplace_helperdouble (gr, ctx-kv, key, is_array, n); break; case GGUF_TYPE_ARRAY: default: { GGML_LOG_ERROR(%s: key %s has invalid GGUF type %d\n, __func__, key.c_str(), type); ok false; } break; } } if (!ok) { GGML_LOG_ERROR(%s: failed to read key-value pairs\n, __func__); gguf_free(ctx); return nullptr; } GGML_ASSERT(int64_t(ctx-kv.size()) n_kv); const int alignment_idx gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT); ctx-alignment alignment_idx -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx); if (ctx-alignment 0 || (ctx-alignment (ctx-alignment - 1)) ! 0) { GGML_LOG_ERROR(%s: alignment %zu is not a power of 2\n, __func__, ctx-alignment); gguf_free(ctx); return nullptr; } } // read the tensor info for (int64_t i 0; ok i n_tensors; i) { struct gguf_tensor_info info; // tensor name { std::string name; try { ok ok gr.read(name); } catch (std::length_error ) { GGML_LOG_ERROR(%s: encountered length_error while reading tensor name % PRIi64 \n, __func__, i); ok false; } catch (std::bad_alloc ) { GGML_LOG_ERROR(%s: encountered bad_alloc error while reading tensor name % PRIi64 \n, __func__, i); ok false; } if (name.length() GGML_MAX_NAME) { GGML_LOG_ERROR(%s: tensor name % PRIi64 is too long: %zu %d\n, __func__, i, name.length(), GGML_MAX_NAME); ok false; break; } ggml_set_name(info.t, name.c_str()); // make sure there are no duplicate tensor names for (int64_t j 0; ok j i; j) { if (strcmp(info.t.name, ctx-info[j].t.name) 0) { GGML_LOG_ERROR(%s: duplicate tensor name %s for tensors % PRIi64 and % PRIi64 \n, __func__, info.t.name, j, i); ok false; break; } } } if (!ok) { break; } // tensor shape { uint32_t n_dims 0; ok ok gr.read(n_dims); if (n_dims GGML_MAX_DIMS) { GGML_LOG_ERROR(%s: tensor %s has invalid number of dimensions: % PRIu32 % PRIu32 \n, __func__, info.t.name, n_dims, GGML_MAX_DIMS); ok false; break; } for (uint32_t j 0; ok j GGML_MAX_DIMS; j) { info.t.ne[j] 1; if (j n_dims) { ok ok gr.read(info.t.ne[j]); } // check that all ne are non-negative if (info.t.ne[j] 0) { GGML_LOG_ERROR(%s: tensor %s dimension % PRIu32 has invalid number of elements: % PRIi64 0\n, __func__, info.t.name, j, info.t.ne[j]); ok false; break; } } // check that the total number of elements is representable if (ok ((INT64_MAX/info.t.ne[1] info.t.ne[0]) || (INT64_MAX/info.t.ne[2] info.t.ne[0]*info.t.ne[1]) || (INT64_MAX/info.t.ne[3] info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) { GGML_LOG_ERROR(%s: total number of elements in tensor %s with shape (% PRIi64 , % PRIi64 , % PRIi64 , % PRIi64 ) is % PRIi64 \n, __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX); ok false; break; } } if (!ok) { break; } // tensor type { ok ok gr.read(info.t.type); // check that tensor type is within defined range if (info.t.type 0 || info.t.type GGML_TYPE_COUNT) { GGML_LOG_ERROR(%s: tensor %s has invalid ggml type %d. should be in [0, %d)\n, __func__, info.t.name, info.t.type, GGML_TYPE_COUNT); ok false; break; } const size_t type_size ggml_type_size(info.t.type); const int64_t blck_size ggml_blck_size(info.t.type); // check that row size is divisible by block size if (blck_size 0 || info.t.ne[0] % blck_size ! 0) { GGML_LOG_ERROR(%s: tensor %s of type %d (%s) has % PRId64 elements per row, not a multiple of block size (% PRId64 )\n, __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size); ok false; break; } // check that the size of the tensor in bytes is representable if (ok uint64_t(ggml_nelements(info.t)/ggml_blck_size(info.t.type)) SIZE_MAX/ggml_type_size(info.t.type)) { GGML_LOG_ERROR(%s: tensor %s with shape (% PRIi64 , % PRIi64 , % PRIi64 , % PRIi64 ) has a size in bytes %zu\n, __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX); ok false; break; } // calculate byte offsets given the tensor shape and type info.t.nb[0] type_size; info.t.nb[1] info.t.nb[0]*(info.t.ne[0]/blck_size); for (int j 2; j GGML_MAX_DIMS; j) { info.t.nb[j] info.t.nb[j - 1]*info.t.ne[j - 1]; } } if (!ok) { break; } // tensor data offset within buffer ok ok gr.read(info.offset); ctx-info.push_back(info); } if (!ok) { GGML_LOG_ERROR(%s: failed to read tensor info\n, __func__); gguf_free(ctx); return nullptr; } GGML_ASSERT(int64_t(ctx-info.size()) n_tensors); // we require the data section to be aligned, so take into account any padding if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx-alignment), SEEK_SET) ! 0) { GGML_LOG_ERROR(%s: failed to seek to beginning of data section\n, __func__); gguf_free(ctx); return nullptr; } // store the current file offset - this is where the data section starts ctx-offset gguf_ftell(file); // compute the total size of the data section, taking into account the alignment { ctx-size 0; for (size_t i 0; i ctx-info.size(); i) { const gguf_tensor_info ti ctx-info[i]; if (ti.offset ! ctx-size) { GGML_LOG_ERROR(%s: tensor %s has offset % PRIu64 , expected %zu\n, __func__, ti.t.name, ti.offset, ctx-size); GGML_LOG_ERROR(%s: failed to read tensor data\n, __func__); gguf_free(ctx); return nullptr; } size_t padded_size GGML_PAD(ggml_nbytes(ti.t), ctx-alignment); if (SIZE_MAX - ctx-size padded_size) { GGML_LOG_ERROR(%s: tensor %s size overflow, cannot accumulate size %zu %zu\n, __func__, ti.t.name, ctx-size, padded_size); gguf_free(ctx); return nullptr; } ctx-size padded_size; } } // load the tensor data only if requested if (params.ctx ! nullptr) { // if the provided gguf_context is no_alloc, then we create empty tensors and do not read the binary blob // otherwise, we load the binary blob into the created ggml_context as well, and point the data members of // the ggml_tensor structs to the appropriate locations in the binary blob // compute the exact size needed for the new ggml_context size_t mem_size 0; if (params.no_alloc) { if (n_tensors ! 0 SIZE_MAX / n_tensors ggml_tensor_overhead()) { GGML_LOG_ERROR(%s: memory size overflow while allocating ggml context\n, __func__); gguf_free(ctx); return nullptr; } const size_t overhead n_tensors * ggml_tensor_overhead(); mem_size overhead; } else { if ((n_tensors 1) ! 0 SIZE_MAX / (n_tensors 1) ggml_tensor_overhead()) { GGML_LOG_ERROR(%s: memory size overflow while allocating ggml context\n, __func__); gguf_free(ctx); return nullptr; } const size_t overhead (n_tensors 1) * ggml_tensor_overhead(); if (SIZE_MAX - overhead ctx-size) { GGML_LOG_ERROR(%s: memory size overflow while allocating ggml context\n, __func__); gguf_free(ctx); return nullptr; } mem_size overhead ctx-size; } struct ggml_init_params pdata { /*mem_size */ mem_size, /*mem_buffer */ nullptr, /*no_alloc */ params.no_alloc, }; *params.ctx ggml_init(pdata); if (*params.ctx nullptr) { GGML_LOG_ERROR(%s: failed to initialize ggml context for storing tensors\n, __func__); gguf_free(ctx); return nullptr; } struct ggml_context * ctx_data *params.ctx; struct ggml_tensor * data nullptr; if (!params.no_alloc) { data ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx-size); ok ok data ! nullptr; if (ok) { ggml_set_name(data, GGUF tensor data binary blob); } // read the binary blob with the tensor data ok ok gr.read(data-data, ctx-size); if (!ok) { GGML_LOG_ERROR(%s: failed to read tensor data binary blob\n, __func__); ggml_free(ctx_data); *params.ctx nullptr; gguf_free(ctx); return nullptr; } ctx-data >二、核心设计重点1. 类型映射系统最关键用模板特化把 C 基础类型int/float/string/bool 等和 GGUF 定义的类型一一绑定用两个静态映射表类型 → 占用字节大小类型 → 简短名称u8/i8/f32 等目的读写文件时自动匹配数据类型不混淆、不解析错误2. 核心数据结构gguf_kv存储 GGUF 的键值对模型版本、作者、上下文长度、层数量等配置支持普通值数组字符串统一存储、统一读取。gguf_tensor_info存储张量信息名字、形状、数据类型、在文件中的偏移位置。gguf_contextGGUF 文件的总容器版本、对齐规则所有键值对所有张量信息张量权重数据指针 / 内存gguf_reader/gguf_writer封装文件读写自动处理大小端、剩余字节校验统一读取任意类型基础类型 / 字符串 / 数组支持文件写入内存缓冲区写入3. 安全与校验机制最大字符串 / 数组长度限制防止恶意文件崩溃程序重复键 / 张量名校验张量形状、数据类型合法性校验内存溢出、文件越界读取防护对齐校验GGUF 要求数据按指定字节对齐三、C 加载 GGUF 文件的完整流程重点这是你最关心的C 如何从 .gguf 文件加载模型按执行顺序讲1. 打开文件用二进制只读模式打开 GGUF 文件准备读取。2. 校验文件合法性读取文件头 4 字节魔法数GGUF不是则直接报错读取版本号只支持兼容版本拒绝过旧 / 过新文件3. 读取文件头部信息张量总数量键值对总数量文件对齐规则默认 32 字节4. 解析所有键值对循环读取所有 KV键名数据类型是否数组数据内容全部存入gguf_context后续可直接查询。5. 解析所有张量元信息循环读取每个张量张量名字维度形状如 4096×4096数据类型FP16/Q4_K 等权重数据在文件中的偏移位置全部存入张量列表。6. 跳转到权重数据区文件头部 / 元数据读完后按对齐规则跳过填充字节到达权重二进制区。7. 加载张量权重可选计算所有张量总大小申请内存一次性读取所有权重二进制数据把每个张量的指针指向对应内存偏移位置集成到 GGML 计算上下文大模型推理框架8. 返回完整上下文返回gguf_context对象包含所有模型配置所有张量信息权重数据内存指针加载完成。四、代码核心能力总结类型安全严格绑定 C 类型与 GGUF 类型解析零错误完整读写既能读 GGUF也能写 / 修改 GGUF模型集成直接对接 GGML 张量系统适合大模型推理鲁棒性全流程校验防止崩溃、内存泄漏、文件损坏高效一次性读取权重无冗余 IO人人皆为创造者共创方能共成长每个人都是使用者也是创造者是数字世界的消费者更是价值的生产者与分享者。在智能时代的浪潮里单打独斗的发展模式早已落幕唯有开放连接、创意共创、利益共享才能让个体价值汇聚成生态合力让技术与创意双向奔赴实现平台与伙伴的快速成长、共赢致远。原创永久分成共赴星辰大海原创创意共创、永久收益分成是东方仙盟始终坚守的核心理念。我们坚信每一份原创智慧都值得被尊重与回馈以永久分成锚定共创初心让创意者长期享有价值红利携手万千伙伴向着科技星辰大海笃定前行拥抱硅基生命与数字智能交融的未来共筑跨越时代的数字文明共同体。东方仙盟拥抱知识开源共筑数字新生态在全球化与数字化浪潮中东方仙盟始终秉持开放协作、知识共享的理念积极拥抱开源技术与开放标准。我们相信唯有打破技术壁垒、汇聚全球智慧才能真正推动行业的可持续发展。开源赋能中小商户通过将前端异常检测、跨系统数据互联等核心能力开源化东方仙盟为全球中小商户提供了低成本、高可靠的技术解决方案让更多商家能够平等享受数字转型的红利。共建行业标准我们积极参与国际技术社区与全球开发者、合作伙伴共同制定开放协议与技术规范推动跨境零售、文旅、餐饮等多业态的系统互联互通构建更加公平、高效的数字生态。知识普惠共促发展通过开源社区、技术文档与培训体系东方仙盟致力于将前沿技术转化为可落地的行业实践赋能全球合作伙伴共同培育创新人才推动数字经济的普惠式增长阿雪技术观在科技发展浪潮中我们不妨积极投身技术共享。不满足于做受益者更要主动担当贡献者。无论是分享代码、撰写技术博客还是参与开源项目维护改进每一个微小举动都可能蕴含推动技术进步的巨大能量。东方仙盟是汇聚力量的天地我们携手在此探索硅基生命为科技进步添砖加瓦。Hey folks, in this wild tech - driven world, why not dive headfirst into the whole tech - sharing scene? Dont just be the one reaping all the benefits; step up and be a contributor too. Whether youre tossing out your code snippets , hammering out some tech blogs, or getting your hands dirty with maintaining and sprucing up open - source projects, every little thing you do might just end up being a massive force that pushes tech forward. And guess what? The Eastern FairyAlliance is this awesome place where we all come together. Were gonna team up and explore the whole silicon - based life thing, and in the process, well be fueling the growth of technology

SenseVoicecpp gguf大模型[AI人工智能(七十四)]—东方仙盟

最新文章

PAT乙级刷题避坑指南：从‘我要通过！’到‘狼人杀’，那些题目里没说清的隐藏考点

从芯片设计到客户手里：揭秘AE、FAE、PE、VE如何接力完成一颗IC的旅程

用PaddleOCR v3搞定80种语言图片文字提取：从安装到实战避坑全记录

保姆级避坑指南：在ROS Noetic上搞定aruco_ros编译与单目相机定位（解决CV_FILLED报错）

碧蓝航线Alas脚本完整指南：自动化游戏终极解决方案

FUXA工业级可视化监控系统：5天从零构建专业SCADA平台的完整指南

推荐文章

相关文章

分享文章

更多文章

邮票面值设计中的算法优化技巧：从暴力枚举到智能剪枝

STM32定时器DMA Burst模式实战：用CubeMX配置PWM波形自动切换（附代码）

效果实测：Image-to-Video如何将风景照变成动态视频？

告别‘夜盲症’：用Python+OpenCV手把手教你实现红外与可见光图像融合（附完整代码）

Qwen3-8B实战测评：8B模型中的逻辑推理王者，实测效果惊艳

抖音a_bogus逆向实战：手把手教你用Node.js补全缺失的window环境

告别默认样式：C# WinForm自定义最大化最小化关闭按钮实战（含资源文件管理技巧）

告别格式返工！PaperXie AI 排版：10 分钟搞定 5000 + 高校毕业论文规范，零失误提交全攻略

C语言二分查找题目练习

硬件成本暴涨，南大通用 GBase 8s数据库全链路省钱攻略（下）

计算机毕业设计：新能源汽车多维度数据分析系统 Django框架 Scrapy爬虫可视化数据分析大数据大模型机器学习（建议收藏）✅

如何快速解决tikuAdapter题库适配器用户管理系统启动问题：终极指南