Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
569 lines
16 KiB
569 lines
16 KiB
// Copyright 2019 The Gitea Authors. All rights reserved. |
|
// Use of this source code is governed by a MIT-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package queue |
|
|
|
import ( |
|
"context" |
|
"fmt" |
|
"runtime/pprof" |
|
"sync" |
|
"sync/atomic" |
|
"time" |
|
|
|
"code.gitea.io/gitea/modules/log" |
|
"code.gitea.io/gitea/modules/process" |
|
"code.gitea.io/gitea/modules/util" |
|
) |
|
|
|
// WorkerPool represent a dynamically growable worker pool for a |
|
// provided handler function. They have an internal channel which |
|
// they use to detect if there is a block and will grow and shrink in |
|
// response to demand as per configuration. |
|
type WorkerPool struct { |
|
lock sync.Mutex |
|
baseCtx context.Context |
|
baseCtxCancel context.CancelFunc |
|
baseCtxFinished process.FinishedFunc |
|
paused chan struct{} |
|
resumed chan struct{} |
|
cond *sync.Cond |
|
qid int64 |
|
maxNumberOfWorkers int |
|
numberOfWorkers int |
|
batchLength int |
|
handle HandlerFunc |
|
dataChan chan Data |
|
blockTimeout time.Duration |
|
boostTimeout time.Duration |
|
boostWorkers int |
|
numInQueue int64 |
|
} |
|
|
|
var ( |
|
_ Flushable = &WorkerPool{} |
|
_ ManagedPool = &WorkerPool{} |
|
) |
|
|
|
// WorkerPoolConfiguration is the basic configuration for a WorkerPool |
|
type WorkerPoolConfiguration struct { |
|
Name string |
|
QueueLength int |
|
BatchLength int |
|
BlockTimeout time.Duration |
|
BoostTimeout time.Duration |
|
BoostWorkers int |
|
MaxWorkers int |
|
} |
|
|
|
// NewWorkerPool creates a new worker pool |
|
func NewWorkerPool(handle HandlerFunc, config WorkerPoolConfiguration) *WorkerPool { |
|
ctx, cancel, finished := process.GetManager().AddTypedContext(context.Background(), fmt.Sprintf("Queue: %s", config.Name), process.SystemProcessType, false) |
|
|
|
dataChan := make(chan Data, config.QueueLength) |
|
pool := &WorkerPool{ |
|
baseCtx: ctx, |
|
baseCtxCancel: cancel, |
|
baseCtxFinished: finished, |
|
batchLength: config.BatchLength, |
|
dataChan: dataChan, |
|
resumed: closedChan, |
|
paused: make(chan struct{}), |
|
handle: handle, |
|
blockTimeout: config.BlockTimeout, |
|
boostTimeout: config.BoostTimeout, |
|
boostWorkers: config.BoostWorkers, |
|
maxNumberOfWorkers: config.MaxWorkers, |
|
} |
|
|
|
return pool |
|
} |
|
|
|
// Done returns when this worker pool's base context has been cancelled |
|
func (p *WorkerPool) Done() <-chan struct{} { |
|
return p.baseCtx.Done() |
|
} |
|
|
|
// Push pushes the data to the internal channel |
|
func (p *WorkerPool) Push(data Data) { |
|
atomic.AddInt64(&p.numInQueue, 1) |
|
p.lock.Lock() |
|
select { |
|
case <-p.paused: |
|
p.lock.Unlock() |
|
p.dataChan <- data |
|
return |
|
default: |
|
} |
|
|
|
if p.blockTimeout > 0 && p.boostTimeout > 0 && (p.numberOfWorkers <= p.maxNumberOfWorkers || p.maxNumberOfWorkers < 0) { |
|
if p.numberOfWorkers == 0 { |
|
p.zeroBoost() |
|
} else { |
|
p.lock.Unlock() |
|
} |
|
p.pushBoost(data) |
|
} else { |
|
p.lock.Unlock() |
|
p.dataChan <- data |
|
} |
|
} |
|
|
|
// HasNoWorkerScaling will return true if the queue has no workers, and has no worker boosting |
|
func (p *WorkerPool) HasNoWorkerScaling() bool { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.hasNoWorkerScaling() |
|
} |
|
|
|
func (p *WorkerPool) hasNoWorkerScaling() bool { |
|
return p.numberOfWorkers == 0 && (p.boostTimeout == 0 || p.boostWorkers == 0 || p.maxNumberOfWorkers == 0) |
|
} |
|
|
|
// zeroBoost will add a temporary boost worker for a no worker queue |
|
// p.lock must be locked at the start of this function BUT it will be unlocked by the end of this function |
|
// (This is because addWorkers has to be called whilst unlocked) |
|
func (p *WorkerPool) zeroBoost() { |
|
ctx, cancel := context.WithTimeout(p.baseCtx, p.boostTimeout) |
|
mq := GetManager().GetManagedQueue(p.qid) |
|
boost := p.boostWorkers |
|
if (boost+p.numberOfWorkers) > p.maxNumberOfWorkers && p.maxNumberOfWorkers >= 0 { |
|
boost = p.maxNumberOfWorkers - p.numberOfWorkers |
|
} |
|
if mq != nil { |
|
log.Debug("WorkerPool: %d (for %s) has zero workers - adding %d temporary workers for %s", p.qid, mq.Name, boost, p.boostTimeout) |
|
|
|
start := time.Now() |
|
pid := mq.RegisterWorkers(boost, start, true, start.Add(p.boostTimeout), cancel, false) |
|
cancel = func() { |
|
mq.RemoveWorkers(pid) |
|
} |
|
} else { |
|
log.Debug("WorkerPool: %d has zero workers - adding %d temporary workers for %s", p.qid, p.boostWorkers, p.boostTimeout) |
|
} |
|
p.lock.Unlock() |
|
p.addWorkers(ctx, cancel, boost) |
|
} |
|
|
|
func (p *WorkerPool) pushBoost(data Data) { |
|
select { |
|
case p.dataChan <- data: |
|
default: |
|
p.lock.Lock() |
|
if p.blockTimeout <= 0 { |
|
p.lock.Unlock() |
|
p.dataChan <- data |
|
return |
|
} |
|
ourTimeout := p.blockTimeout |
|
timer := time.NewTimer(p.blockTimeout) |
|
p.lock.Unlock() |
|
select { |
|
case p.dataChan <- data: |
|
util.StopTimer(timer) |
|
case <-timer.C: |
|
p.lock.Lock() |
|
if p.blockTimeout > ourTimeout || (p.numberOfWorkers > p.maxNumberOfWorkers && p.maxNumberOfWorkers >= 0) { |
|
p.lock.Unlock() |
|
p.dataChan <- data |
|
return |
|
} |
|
p.blockTimeout *= 2 |
|
boostCtx, boostCtxCancel := context.WithCancel(p.baseCtx) |
|
mq := GetManager().GetManagedQueue(p.qid) |
|
boost := p.boostWorkers |
|
if (boost+p.numberOfWorkers) > p.maxNumberOfWorkers && p.maxNumberOfWorkers >= 0 { |
|
boost = p.maxNumberOfWorkers - p.numberOfWorkers |
|
} |
|
if mq != nil { |
|
log.Debug("WorkerPool: %d (for %s) Channel blocked for %v - adding %d temporary workers for %s, block timeout now %v", p.qid, mq.Name, ourTimeout, boost, p.boostTimeout, p.blockTimeout) |
|
|
|
start := time.Now() |
|
pid := mq.RegisterWorkers(boost, start, true, start.Add(p.boostTimeout), boostCtxCancel, false) |
|
go func() { |
|
<-boostCtx.Done() |
|
mq.RemoveWorkers(pid) |
|
boostCtxCancel() |
|
}() |
|
} else { |
|
log.Debug("WorkerPool: %d Channel blocked for %v - adding %d temporary workers for %s, block timeout now %v", p.qid, ourTimeout, p.boostWorkers, p.boostTimeout, p.blockTimeout) |
|
} |
|
go func() { |
|
<-time.After(p.boostTimeout) |
|
boostCtxCancel() |
|
p.lock.Lock() |
|
p.blockTimeout /= 2 |
|
p.lock.Unlock() |
|
}() |
|
p.lock.Unlock() |
|
p.addWorkers(boostCtx, boostCtxCancel, boost) |
|
p.dataChan <- data |
|
} |
|
} |
|
} |
|
|
|
// NumberOfWorkers returns the number of current workers in the pool |
|
func (p *WorkerPool) NumberOfWorkers() int { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.numberOfWorkers |
|
} |
|
|
|
// NumberInQueue returns the number of items in the queue |
|
func (p *WorkerPool) NumberInQueue() int64 { |
|
return atomic.LoadInt64(&p.numInQueue) |
|
} |
|
|
|
// MaxNumberOfWorkers returns the maximum number of workers automatically added to the pool |
|
func (p *WorkerPool) MaxNumberOfWorkers() int { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.maxNumberOfWorkers |
|
} |
|
|
|
// BoostWorkers returns the number of workers for a boost |
|
func (p *WorkerPool) BoostWorkers() int { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.boostWorkers |
|
} |
|
|
|
// BoostTimeout returns the timeout of the next boost |
|
func (p *WorkerPool) BoostTimeout() time.Duration { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.boostTimeout |
|
} |
|
|
|
// BlockTimeout returns the timeout til the next boost |
|
func (p *WorkerPool) BlockTimeout() time.Duration { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.blockTimeout |
|
} |
|
|
|
// SetPoolSettings sets the setable boost values |
|
func (p *WorkerPool) SetPoolSettings(maxNumberOfWorkers, boostWorkers int, timeout time.Duration) { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
p.maxNumberOfWorkers = maxNumberOfWorkers |
|
p.boostWorkers = boostWorkers |
|
p.boostTimeout = timeout |
|
} |
|
|
|
// SetMaxNumberOfWorkers sets the maximum number of workers automatically added to the pool |
|
// Changing this number will not change the number of current workers but will change the limit |
|
// for future additions |
|
func (p *WorkerPool) SetMaxNumberOfWorkers(newMax int) { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
p.maxNumberOfWorkers = newMax |
|
} |
|
|
|
func (p *WorkerPool) commonRegisterWorkers(number int, timeout time.Duration, isFlusher bool) (context.Context, context.CancelFunc) { |
|
var ctx context.Context |
|
var cancel context.CancelFunc |
|
start := time.Now() |
|
end := start |
|
hasTimeout := false |
|
if timeout > 0 { |
|
ctx, cancel = context.WithTimeout(p.baseCtx, timeout) |
|
end = start.Add(timeout) |
|
hasTimeout = true |
|
} else { |
|
ctx, cancel = context.WithCancel(p.baseCtx) |
|
} |
|
|
|
mq := GetManager().GetManagedQueue(p.qid) |
|
if mq != nil { |
|
pid := mq.RegisterWorkers(number, start, hasTimeout, end, cancel, isFlusher) |
|
log.Trace("WorkerPool: %d (for %s) adding %d workers with group id: %d", p.qid, mq.Name, number, pid) |
|
return ctx, func() { |
|
mq.RemoveWorkers(pid) |
|
} |
|
} |
|
log.Trace("WorkerPool: %d adding %d workers (no group id)", p.qid, number) |
|
|
|
return ctx, cancel |
|
} |
|
|
|
// AddWorkers adds workers to the pool - this allows the number of workers to go above the limit |
|
func (p *WorkerPool) AddWorkers(number int, timeout time.Duration) context.CancelFunc { |
|
ctx, cancel := p.commonRegisterWorkers(number, timeout, false) |
|
p.addWorkers(ctx, cancel, number) |
|
return cancel |
|
} |
|
|
|
// addWorkers adds workers to the pool |
|
func (p *WorkerPool) addWorkers(ctx context.Context, cancel context.CancelFunc, number int) { |
|
for i := 0; i < number; i++ { |
|
p.lock.Lock() |
|
if p.cond == nil { |
|
p.cond = sync.NewCond(&p.lock) |
|
} |
|
p.numberOfWorkers++ |
|
p.lock.Unlock() |
|
go func() { |
|
pprof.SetGoroutineLabels(ctx) |
|
p.doWork(ctx) |
|
|
|
p.lock.Lock() |
|
p.numberOfWorkers-- |
|
if p.numberOfWorkers == 0 { |
|
p.cond.Broadcast() |
|
cancel() |
|
} else if p.numberOfWorkers < 0 { |
|
// numberOfWorkers can't go negative but... |
|
log.Warn("Number of Workers < 0 for QID %d - this shouldn't happen", p.qid) |
|
p.numberOfWorkers = 0 |
|
p.cond.Broadcast() |
|
cancel() |
|
} |
|
select { |
|
case <-p.baseCtx.Done(): |
|
// Don't warn or check for ongoing work if the baseCtx is shutdown |
|
case <-p.paused: |
|
// Don't warn or check for ongoing work if the pool is paused |
|
default: |
|
if p.hasNoWorkerScaling() { |
|
log.Warn( |
|
"Queue: %d is configured to be non-scaling and has no workers - this configuration is likely incorrect.\n"+ |
|
"The queue will be paused to prevent data-loss with the assumption that you will add workers and unpause as required.", p.qid) |
|
p.pause() |
|
} else if p.numberOfWorkers == 0 && atomic.LoadInt64(&p.numInQueue) > 0 { |
|
// OK there are no workers but... there's still work to be done -> Reboost |
|
p.zeroBoost() |
|
// p.lock will be unlocked by zeroBoost |
|
return |
|
} |
|
} |
|
p.lock.Unlock() |
|
}() |
|
} |
|
} |
|
|
|
// Wait for WorkerPool to finish |
|
func (p *WorkerPool) Wait() { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
if p.cond == nil { |
|
p.cond = sync.NewCond(&p.lock) |
|
} |
|
if p.numberOfWorkers <= 0 { |
|
return |
|
} |
|
p.cond.Wait() |
|
} |
|
|
|
// IsPaused returns if the pool is paused |
|
func (p *WorkerPool) IsPaused() bool { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
select { |
|
case <-p.paused: |
|
return true |
|
default: |
|
return false |
|
} |
|
} |
|
|
|
// IsPausedIsResumed returns if the pool is paused and a channel that is closed when it is resumed |
|
func (p *WorkerPool) IsPausedIsResumed() (<-chan struct{}, <-chan struct{}) { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
return p.paused, p.resumed |
|
} |
|
|
|
// Pause pauses the WorkerPool |
|
func (p *WorkerPool) Pause() { |
|
p.lock.Lock() |
|
defer p.lock.Unlock() |
|
p.pause() |
|
} |
|
|
|
func (p *WorkerPool) pause() { |
|
select { |
|
case <-p.paused: |
|
default: |
|
p.resumed = make(chan struct{}) |
|
close(p.paused) |
|
} |
|
} |
|
|
|
// Resume resumes the WorkerPool |
|
func (p *WorkerPool) Resume() { |
|
p.lock.Lock() // can't defer unlock because of the zeroBoost at the end |
|
select { |
|
case <-p.resumed: |
|
// already resumed - there's nothing to do |
|
p.lock.Unlock() |
|
return |
|
default: |
|
} |
|
|
|
p.paused = make(chan struct{}) |
|
close(p.resumed) |
|
|
|
// OK now we need to check if we need to add some workers... |
|
if p.numberOfWorkers > 0 || p.hasNoWorkerScaling() || atomic.LoadInt64(&p.numInQueue) == 0 { |
|
// We either have workers, can't scale or there's no work to be done -> so just resume |
|
p.lock.Unlock() |
|
return |
|
} |
|
|
|
// OK we got some work but no workers we need to think about boosting |
|
select { |
|
case <-p.baseCtx.Done(): |
|
// don't bother boosting if the baseCtx is done |
|
p.lock.Unlock() |
|
return |
|
default: |
|
} |
|
|
|
// OK we'd better add some boost workers! |
|
p.zeroBoost() |
|
// p.zeroBoost will unlock the lock |
|
} |
|
|
|
// CleanUp will drain the remaining contents of the channel |
|
// This should be called after AddWorkers context is closed |
|
func (p *WorkerPool) CleanUp(ctx context.Context) { |
|
log.Trace("WorkerPool: %d CleanUp", p.qid) |
|
close(p.dataChan) |
|
for data := range p.dataChan { |
|
if unhandled := p.handle(data); unhandled != nil { |
|
if unhandled != nil { |
|
log.Error("Unhandled Data in clean-up of queue %d", p.qid) |
|
} |
|
} |
|
|
|
atomic.AddInt64(&p.numInQueue, -1) |
|
select { |
|
case <-ctx.Done(): |
|
log.Warn("WorkerPool: %d Cleanup context closed before finishing clean-up", p.qid) |
|
return |
|
default: |
|
} |
|
} |
|
log.Trace("WorkerPool: %d CleanUp Done", p.qid) |
|
} |
|
|
|
// Flush flushes the channel with a timeout - the Flush worker will be registered as a flush worker with the manager |
|
func (p *WorkerPool) Flush(timeout time.Duration) error { |
|
ctx, cancel := p.commonRegisterWorkers(1, timeout, true) |
|
defer cancel() |
|
return p.FlushWithContext(ctx) |
|
} |
|
|
|
// IsEmpty returns if true if the worker queue is empty |
|
func (p *WorkerPool) IsEmpty() bool { |
|
return atomic.LoadInt64(&p.numInQueue) == 0 |
|
} |
|
|
|
// FlushWithContext is very similar to CleanUp but it will return as soon as the dataChan is empty |
|
// NB: The worker will not be registered with the manager. |
|
func (p *WorkerPool) FlushWithContext(ctx context.Context) error { |
|
log.Trace("WorkerPool: %d Flush", p.qid) |
|
for { |
|
select { |
|
case data := <-p.dataChan: |
|
if unhandled := p.handle(data); unhandled != nil { |
|
log.Error("Unhandled Data whilst flushing queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1) |
|
case <-p.baseCtx.Done(): |
|
return p.baseCtx.Err() |
|
case <-ctx.Done(): |
|
return ctx.Err() |
|
default: |
|
return nil |
|
} |
|
} |
|
} |
|
|
|
func (p *WorkerPool) doWork(ctx context.Context) { |
|
pprof.SetGoroutineLabels(ctx) |
|
delay := time.Millisecond * 300 |
|
|
|
// Create a common timer - we will use this elsewhere |
|
timer := time.NewTimer(0) |
|
util.StopTimer(timer) |
|
|
|
paused, _ := p.IsPausedIsResumed() |
|
data := make([]Data, 0, p.batchLength) |
|
for { |
|
select { |
|
case <-paused: |
|
log.Trace("Worker for Queue %d Pausing", p.qid) |
|
if len(data) > 0 { |
|
log.Trace("Handling: %d data, %v", len(data), data) |
|
if unhandled := p.handle(data...); unhandled != nil { |
|
log.Error("Unhandled Data in queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1*int64(len(data))) |
|
} |
|
_, resumed := p.IsPausedIsResumed() |
|
select { |
|
case <-resumed: |
|
paused, _ = p.IsPausedIsResumed() |
|
log.Trace("Worker for Queue %d Resuming", p.qid) |
|
util.StopTimer(timer) |
|
case <-ctx.Done(): |
|
log.Trace("Worker shutting down") |
|
return |
|
} |
|
default: |
|
} |
|
select { |
|
case <-paused: |
|
// go back around |
|
case <-ctx.Done(): |
|
if len(data) > 0 { |
|
log.Trace("Handling: %d data, %v", len(data), data) |
|
if unhandled := p.handle(data...); unhandled != nil { |
|
log.Error("Unhandled Data in queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1*int64(len(data))) |
|
} |
|
log.Trace("Worker shutting down") |
|
return |
|
case datum, ok := <-p.dataChan: |
|
if !ok { |
|
// the dataChan has been closed - we should finish up: |
|
if len(data) > 0 { |
|
log.Trace("Handling: %d data, %v", len(data), data) |
|
if unhandled := p.handle(data...); unhandled != nil { |
|
log.Error("Unhandled Data in queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1*int64(len(data))) |
|
} |
|
log.Trace("Worker shutting down") |
|
return |
|
} |
|
data = append(data, datum) |
|
util.StopTimer(timer) |
|
|
|
if len(data) >= p.batchLength { |
|
log.Trace("Handling: %d data, %v", len(data), data) |
|
if unhandled := p.handle(data...); unhandled != nil { |
|
log.Error("Unhandled Data in queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1*int64(len(data))) |
|
data = make([]Data, 0, p.batchLength) |
|
} else { |
|
timer.Reset(delay) |
|
} |
|
case <-timer.C: |
|
delay = time.Millisecond * 100 |
|
if len(data) > 0 { |
|
log.Trace("Handling: %d data, %v", len(data), data) |
|
if unhandled := p.handle(data...); unhandled != nil { |
|
log.Error("Unhandled Data in queue %d", p.qid) |
|
} |
|
atomic.AddInt64(&p.numInQueue, -1*int64(len(data))) |
|
data = make([]Data, 0, p.batchLength) |
|
} |
|
} |
|
} |
|
}
|
|
|