Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
397 lines
8.1 KiB
397 lines
8.1 KiB
package hbase |
|
|
|
import ( |
|
"bytes" |
|
|
|
pb "github.com/golang/protobuf/proto" |
|
"github.com/juju/errors" |
|
"github.com/ngaut/log" |
|
"github.com/pingcap/go-hbase/proto" |
|
) |
|
|
|
// nextKey returns the next key in byte-order. |
|
// for example: |
|
// nil -> [0] |
|
// [] -> [0] |
|
// [0] -> [1] |
|
// [1, 2, 3] -> [1, 2, 4] |
|
// [1, 255] -> [2, 0] |
|
// [255] -> [0, 0] |
|
func nextKey(data []byte) []byte { |
|
// nil or []byte{} |
|
dataLen := len(data) |
|
if dataLen == 0 { |
|
return []byte{0} |
|
} |
|
|
|
// Check and process carry bit. |
|
i := dataLen - 1 |
|
data[i]++ |
|
for i > 0 { |
|
if data[i] == 0 { |
|
i-- |
|
data[i]++ |
|
} else { |
|
break |
|
} |
|
} |
|
|
|
// Check whether need to add another byte for carry bit, |
|
// like [255] -> [0, 0] |
|
if data[i] == 0 { |
|
data = append([]byte{0}, data...) |
|
} |
|
|
|
return data |
|
} |
|
|
|
const ( |
|
defaultScanMaxRetries = 3 |
|
) |
|
|
|
type Scan struct { |
|
client *client |
|
id uint64 |
|
table []byte |
|
// row key |
|
StartRow []byte |
|
StopRow []byte |
|
families [][]byte |
|
qualifiers [][][]byte |
|
nextStartKey []byte |
|
numCached int |
|
closed bool |
|
location *RegionInfo |
|
server *connection |
|
cache []*ResultRow |
|
attrs map[string][]byte |
|
MaxVersions uint32 |
|
TsRangeFrom uint64 |
|
TsRangeTo uint64 |
|
lastResult *ResultRow |
|
// if region split, set startKey = lastResult.Row, but must skip the first |
|
skipFirst bool |
|
maxRetries int |
|
} |
|
|
|
func NewScan(table []byte, batchSize int, c HBaseClient) *Scan { |
|
if batchSize <= 0 { |
|
batchSize = 100 |
|
} |
|
return &Scan{ |
|
client: c.(*client), |
|
table: table, |
|
nextStartKey: nil, |
|
families: make([][]byte, 0), |
|
qualifiers: make([][][]byte, 0), |
|
numCached: batchSize, |
|
closed: false, |
|
attrs: make(map[string][]byte), |
|
maxRetries: defaultScanMaxRetries, |
|
} |
|
} |
|
|
|
func (s *Scan) Close() error { |
|
if s.closed { |
|
return nil |
|
} |
|
|
|
err := s.closeScan(s.server, s.location, s.id) |
|
if err != nil { |
|
return errors.Trace(err) |
|
} |
|
|
|
s.closed = true |
|
return nil |
|
} |
|
|
|
func (s *Scan) AddColumn(family, qual []byte) { |
|
s.AddFamily(family) |
|
pos := s.posOfFamily(family) |
|
s.qualifiers[pos] = append(s.qualifiers[pos], qual) |
|
} |
|
|
|
func (s *Scan) AddStringColumn(family, qual string) { |
|
s.AddColumn([]byte(family), []byte(qual)) |
|
} |
|
|
|
func (s *Scan) AddFamily(family []byte) { |
|
pos := s.posOfFamily(family) |
|
if pos == -1 { |
|
s.families = append(s.families, family) |
|
s.qualifiers = append(s.qualifiers, make([][]byte, 0)) |
|
} |
|
} |
|
|
|
func (s *Scan) AddStringFamily(family string) { |
|
s.AddFamily([]byte(family)) |
|
} |
|
|
|
func (s *Scan) posOfFamily(family []byte) int { |
|
for p, v := range s.families { |
|
if bytes.Equal(family, v) { |
|
return p |
|
} |
|
} |
|
return -1 |
|
} |
|
|
|
func (s *Scan) AddAttr(name string, val []byte) { |
|
s.attrs[name] = val |
|
} |
|
|
|
func (s *Scan) AddTimeRange(from uint64, to uint64) { |
|
s.TsRangeFrom = from |
|
s.TsRangeTo = to |
|
} |
|
|
|
func (s *Scan) Closed() bool { |
|
return s.closed |
|
} |
|
|
|
func (s *Scan) CreateGetFromScan(row []byte) *Get { |
|
g := NewGet(row) |
|
for i, family := range s.families { |
|
if len(s.qualifiers[i]) > 0 { |
|
for _, qual := range s.qualifiers[i] { |
|
g.AddColumn(family, qual) |
|
} |
|
} else { |
|
g.AddFamily(family) |
|
} |
|
} |
|
return g |
|
} |
|
|
|
func (s *Scan) getData(startKey []byte, retries int) ([]*ResultRow, error) { |
|
server, location, err := s.getServerAndLocation(s.table, startKey) |
|
if err != nil { |
|
return nil, errors.Trace(err) |
|
} |
|
|
|
req := &proto.ScanRequest{ |
|
Region: &proto.RegionSpecifier{ |
|
Type: proto.RegionSpecifier_REGION_NAME.Enum(), |
|
Value: []byte(location.Name), |
|
}, |
|
NumberOfRows: pb.Uint32(uint32(s.numCached)), |
|
Scan: &proto.Scan{}, |
|
} |
|
|
|
// set attributes |
|
var attrs []*proto.NameBytesPair |
|
for k, v := range s.attrs { |
|
p := &proto.NameBytesPair{ |
|
Name: pb.String(k), |
|
Value: v, |
|
} |
|
attrs = append(attrs, p) |
|
} |
|
if len(attrs) > 0 { |
|
req.Scan.Attribute = attrs |
|
} |
|
|
|
if s.id > 0 { |
|
req.ScannerId = pb.Uint64(s.id) |
|
} |
|
req.Scan.StartRow = startKey |
|
if s.StopRow != nil { |
|
req.Scan.StopRow = s.StopRow |
|
} |
|
if s.MaxVersions > 0 { |
|
req.Scan.MaxVersions = &s.MaxVersions |
|
} |
|
if s.TsRangeTo > s.TsRangeFrom { |
|
req.Scan.TimeRange = &proto.TimeRange{ |
|
From: pb.Uint64(s.TsRangeFrom), |
|
To: pb.Uint64(s.TsRangeTo), |
|
} |
|
} |
|
|
|
for i, v := range s.families { |
|
req.Scan.Column = append(req.Scan.Column, &proto.Column{ |
|
Family: v, |
|
Qualifier: s.qualifiers[i], |
|
}) |
|
} |
|
|
|
cl := newCall(req) |
|
err = server.call(cl) |
|
if err != nil { |
|
return nil, errors.Trace(err) |
|
} |
|
|
|
msg := <-cl.responseCh |
|
rs, err := s.processResponse(msg) |
|
if err != nil && (isNotInRegionError(err) || isUnknownScannerError(err)) { |
|
if retries <= s.maxRetries { |
|
// clean this table region cache and try again |
|
s.client.CleanRegionCache(s.table) |
|
// create new scanner and set startRow to lastResult |
|
s.id = 0 |
|
if s.lastResult != nil { |
|
startKey = s.lastResult.Row |
|
s.skipFirst = true |
|
} |
|
s.server = nil |
|
s.location = nil |
|
log.Warnf("Retryint get data for %d time(s)", retries+1) |
|
retrySleep(retries + 1) |
|
return s.getData(startKey, retries+1) |
|
} |
|
} |
|
return rs, nil |
|
} |
|
|
|
func (s *Scan) processResponse(response pb.Message) ([]*ResultRow, error) { |
|
var res *proto.ScanResponse |
|
switch r := response.(type) { |
|
case *proto.ScanResponse: |
|
res = r |
|
case *exception: |
|
return nil, errors.New(r.msg) |
|
default: |
|
return nil, errors.Errorf("Invalid response seen [response: %#v]", response) |
|
} |
|
|
|
// Check whether response is nil. |
|
if res == nil { |
|
return nil, errors.Errorf("Empty response: [table=%s] [StartRow=%q] [StopRow=%q] ", s.table, s.StartRow, s.StopRow) |
|
} |
|
|
|
nextRegion := true |
|
s.nextStartKey = nil |
|
s.id = res.GetScannerId() |
|
|
|
results := res.GetResults() |
|
n := len(results) |
|
|
|
if (n == s.numCached) || |
|
len(s.location.EndKey) == 0 || |
|
(s.StopRow != nil && bytes.Compare(s.location.EndKey, s.StopRow) > 0 && n < s.numCached) || |
|
res.GetMoreResultsInRegion() { |
|
nextRegion = false |
|
} |
|
|
|
var err error |
|
if nextRegion { |
|
s.nextStartKey = s.location.EndKey |
|
err = s.closeScan(s.server, s.location, s.id) |
|
if err != nil { |
|
return nil, errors.Trace(err) |
|
} |
|
s.server = nil |
|
s.location = nil |
|
s.id = 0 |
|
} |
|
|
|
if n == 0 && !nextRegion { |
|
err = s.Close() |
|
if err != nil { |
|
return nil, errors.Trace(err) |
|
} |
|
} |
|
|
|
if s.skipFirst { |
|
results = results[1:] |
|
s.skipFirst = false |
|
n = len(results) |
|
} |
|
|
|
tbr := make([]*ResultRow, n) |
|
for i, v := range results { |
|
if v != nil { |
|
tbr[i] = NewResultRow(v) |
|
} |
|
} |
|
|
|
return tbr, nil |
|
} |
|
|
|
func (s *Scan) nextBatch() int { |
|
startKey := s.nextStartKey |
|
if startKey == nil { |
|
startKey = s.StartRow |
|
} |
|
|
|
// Notice: ignore error here. |
|
// TODO: add error check, now only add a log. |
|
rs, err := s.getData(startKey, 0) |
|
if err != nil { |
|
log.Errorf("scan next batch failed - [startKey=%q], %v", startKey, errors.ErrorStack(err)) |
|
} |
|
|
|
// Current region get 0 data, try switch to next region. |
|
if len(rs) == 0 && len(s.nextStartKey) > 0 { |
|
// TODO: add error check, now only add a log. |
|
rs, err = s.getData(s.nextStartKey, 0) |
|
if err != nil { |
|
log.Errorf("scan next batch failed - [startKey=%q], %v", s.nextStartKey, errors.ErrorStack(err)) |
|
} |
|
} |
|
|
|
s.cache = rs |
|
return len(s.cache) |
|
} |
|
|
|
func (s *Scan) Next() *ResultRow { |
|
if s.closed { |
|
return nil |
|
} |
|
var ret *ResultRow |
|
if len(s.cache) == 0 { |
|
n := s.nextBatch() |
|
// no data returned |
|
if n == 0 { |
|
return nil |
|
} |
|
} |
|
|
|
ret = s.cache[0] |
|
s.lastResult = ret |
|
s.cache = s.cache[1:] |
|
return ret |
|
} |
|
|
|
func (s *Scan) closeScan(server *connection, location *RegionInfo, id uint64) error { |
|
if server == nil || location == nil { |
|
return nil |
|
} |
|
|
|
req := &proto.ScanRequest{ |
|
Region: &proto.RegionSpecifier{ |
|
Type: proto.RegionSpecifier_REGION_NAME.Enum(), |
|
Value: []byte(location.Name), |
|
}, |
|
ScannerId: pb.Uint64(id), |
|
CloseScanner: pb.Bool(true), |
|
} |
|
|
|
cl := newCall(req) |
|
err := server.call(cl) |
|
if err != nil { |
|
return errors.Trace(err) |
|
} |
|
|
|
// TODO: add exception check. |
|
<-cl.responseCh |
|
return nil |
|
} |
|
|
|
func (s *Scan) getServerAndLocation(table, startRow []byte) (*connection, *RegionInfo, error) { |
|
if s.server != nil && s.location != nil { |
|
return s.server, s.location, nil |
|
} |
|
|
|
var err error |
|
s.location, err = s.client.LocateRegion(table, startRow, true) |
|
if err != nil { |
|
return nil, nil, errors.Trace(err) |
|
} |
|
|
|
s.server, err = s.client.getClientConn(s.location.Server) |
|
if err != nil { |
|
return nil, nil, errors.Trace(err) |
|
} |
|
return s.server, s.location, nil |
|
}
|
|
|