尽可能使用主键或者索引
使用主键缩小范围:
SELECT * FROM tablename where uid >= 7948212 and uid < 7948312 ORDER BY uid LIMIT 100
explain 性能:
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t_userfriends range PRIMARY PRIMARY 4 1 100.00 Using where
使用主键比 limit + offset 快得多, 特别是100w行以上的大表
使用偏移量:
SELECT * FROM tablename ORDER BY uid LIMIT 100, OFFSET 100200
explain 性能:
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t_userfriends ALL 2789921 100.00
offset越大, mysql引擎查询时, 跳过的行越多, 越耗时
使用并发查询
开启N个协程或线程, 根据实际的mysql配置来评估, 一般推荐3-10
每个协程读取的主键范围,独自计算, 是唯一的, 起始主键都不同
// 并行读取db,根据主键范围
func (a *Repo) scanDbV3() {var ctx = context.Background()var l sync.Mutexvar wg sync.WaitGroupvar totalNum int64var dirtyNum int64var info dbModel.Infovar start = time.Now()var batchSize int64 = 3var pageSize int64 = 100var minUid int64 = 100000var maxUid int64 = 10000000// 寻找最大uiderr := a.db.Select("uid", "list").Order("uid DESC").Take(&info).Errorif err != nil {log.Printf("寻找最大uid failed:%v\n", err)return}maxUid = info.Uidlog.Printf("数据表中最大uid: %v\n", maxUid)readDbRows := func(startUid int64) {defer wg.Done()for {var infos = make([]*dbModel.Info, 0, pageSize)err := a.db.Select("uid", "list").Where("uid > ? AND uid <= ?", startUid, startUid+pageSize).Order("uid").Find(&infos).Errorif err != nil {log.Printf("协程读取数据出错,startUid %d: %v\n", startUid, err)break}if len(infos) == 0 && startUid > maxUid {log.Printf("协程读取数据完毕,startUid %d\n", startUid)break}atomic.AddInt64(&totalNum, int64(len(infos)))// 处理查询到的记录wg.Add(1)task := func() {defer wg.Done()// 处理业务}err = a.pool.Submit(task)if err != nil {log.Printf("pool.Submit failed:%v\n", err)}startUid += batchSize * pageSize // 4 个协程,每个协程偏移量间隔 400}}// 启动 4 个协程wg.Add(int(batchSize))for i := int64(0); i < batchSize; i++ {go readDbRows(minUid + i*pageSize)}defer func() {cost := time.Now().Sub(start)log.Printf("花费时间:%v, 表格上的人数:%v, 处理用户数据异常的玩家人数:%v \n", cost, totalNum, dirtyNum)}()wg.Wait()
}