您的位置:首页 > 游戏 > 游戏 > 使用milvus-sdk-go的迭代器导出数据

使用milvus-sdk-go的迭代器导出数据

2024/10/4 0:57:20 来源:https://blog.csdn.net/shulu/article/details/140468390  浏览:    关键词:使用milvus-sdk-go的迭代器导出数据

使用milvus-sdk-go的迭代器导出数据

迭代器是一种功能强大的工具,可帮助您使用主键值和布尔表达式迭代集合中的大量数据或所有数据。这可以显著改善您检索数据的方式。与传统的offsetlimit参数用法不同,后者可能会随着时间的推移而变得效率低下,而迭代器提供了更具可扩展性的解决方案。

当表数据很大,需要全量导出,我们可以使用迭代器,例如每次只查询1000行数据,直到所有数据查询完成,同时也可以减少服务器压力。

需要注意的是迭代器是一个客户端实现。

下面列举一个例子:写入3000条数据,每次读取100条,直至完全读完完毕。

package mainimport ("context""fmt""io""log""math/rand""strconv""github.com/milvus-io/milvus-sdk-go/v2/client""github.com/milvus-io/milvus-sdk-go/v2/entity"
)const (milvusAddr     = `192.168.230.71:19530`nEntities, dim = 3000, 128collectionName = "hello_iterator"msgFmt                                     = "==== %s ====\n"idCol, randomCol, addressCol, embeddingCol = "ID", "random", "address", "embeddings"topK                                       = 3
)func main() {ctx := context.Background()log.Printf(msgFmt, "start connecting to Milvus")c, err := client.NewClient(ctx, client.Config{Address: milvusAddr,})if err != nil {log.Fatal("failed to connect to milvus, err: ", err.Error())}defer c.Close()// delete collection if existshas, err := c.HasCollection(ctx, collectionName)if err != nil {log.Fatalf("failed to check collection exists, err: %v", err)}if has {c.DropCollection(ctx, collectionName)}// create collectionlog.Printf(msgFmt, fmt.Sprintf("create collection, `%s`", collectionName))schema := entity.NewSchema().WithName(collectionName).WithDescription("hello_milvus is the simplest demo to introduce the APIs").WithField(entity.NewField().WithName(idCol).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(false)).WithField(entity.NewField().WithName(randomCol).WithDataType(entity.FieldTypeDouble)).WithField(entity.NewField().WithName(addressCol).WithDataType(entity.FieldTypeVarChar).WithTypeParams(entity.TypeParamMaxLength, "50")).WithField(entity.NewField().WithName(embeddingCol).WithDataType(entity.FieldTypeFloatVector).WithDim(dim))if err := c.CreateCollection(ctx, schema, entity.DefaultShardNumber); err != nil { // use default shard numberlog.Fatalf("create collection failed, err: %v", err)}// build indexlog.Printf(msgFmt, "start creating index IVF_FLAT")idx, err := entity.NewIndexIvfFlat(entity.L2, 128)if err != nil {log.Fatalf("failed to create ivf flat index, err: %v", err)}if err := c.CreateIndex(ctx, collectionName, embeddingCol, idx, false); err != nil {log.Fatalf("failed to create index, err: %v", err)}log.Printf(msgFmt, "start loading collection")err = c.LoadCollection(ctx, collectionName, false)if err != nil {log.Fatalf("failed to load collection, err: %v", err)}// insert datalog.Printf(msgFmt, "start inserting random entities")idList, randomList := make([]int64, 0, nEntities), make([]float64, 0, nEntities)addressList := make([]string, 0, nEntities)embeddingList := make([][]float32, 0, nEntities)// generate datafor i := 0; i < nEntities; i++ {idList = append(idList, int64(i))}for i := 0; i < nEntities; i++ {randomList = append(randomList, rand.Float64())}for i := 0; i < nEntities; i++ {addressList = append(addressList, "wuhan"+strconv.Itoa(i))}for i := 0; i < nEntities; i++ {vec := make([]float32, 0, dim)for j := 0; j < dim; j++ {vec = append(vec, rand.Float32())}embeddingList = append(embeddingList, vec)}idColData := entity.NewColumnInt64(idCol, idList)randomColData := entity.NewColumnDouble(randomCol, randomList)addressColData := entity.NewColumnVarChar(addressCol, addressList)embeddingColData := entity.NewColumnFloatVector(embeddingCol, dim, embeddingList)if _, err := c.Insert(ctx, collectionName, "", idColData, randomColData, addressColData, embeddingColData); err != nil {log.Fatalf("failed to insert random data into `hello_milvus, err: %v", err)}if err := c.Flush(ctx, collectionName, false); err != nil {log.Fatalf("failed to flush data, err: %v", err)}// 使用迭代器,每次读取100行数据itr, err := c.QueryIterator(ctx, client.NewQueryIteratorOption(collectionName).WithOutputFields(idCol, randomCol, embeddingCol).WithBatchSize(100))if err != nil {log.Fatal("failed to query iterator: ", err.Error())}for {rs, err := itr.Next(ctx)if err != nil {if err == io.EOF {log.Println("iterator reach EOF")break}log.Fatal("failed to query iterator. next: ", err.Error())}var idlist []int64var randomlist []float64for _, col := range rs {if col.Name() == idCol {idColumn := col.(*entity.ColumnInt64)for i := 0; i < col.Len(); i++ {val, err := idColumn.ValueByIdx(i)if err != nil {log.Fatal(err)}idlist = append(idlist, val)}}if col.Name() == randomCol {randomColumn := col.(*entity.ColumnDouble)for i := 0; i < col.Len(); i++ {val, err := randomColumn.ValueByIdx(i)if err != nil {log.Fatal(err)}randomlist = append(randomlist, val)}}}log.Printf("\tids: %#v\n", idlist)log.Printf("\trandoms: %#v\n", randomlist)}// drop collectionlog.Printf(msgFmt, "drop collection `hello_milvus`")if err := c.DropCollection(ctx, collectionName); err != nil {log.Fatalf("failed to drop collection, err: %v", err)}
}

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com