ODF OSD goes into CrashLoopBackOff due to the following error: "bluefs enospc"
Environment
- Red Hat OpenShift Data Foundation (RHODF)
- 4
- Red Hat OpenShift Container Storage (RHOCS)
- 4
Issue
OSD pod goes into CrashLoopBackOff due to the following error: ceph_abort_msg("bluefs enospc")
Here an example from the logs pod:
# oc logs rook-ceph-osd-2-6b886d585c-gdjb8 -c osd
debug 2023-03-17T11:27:02.869+0000 7f0d50d3f080 4 rocksdb: [db_impl/db_impl_open.cc:760] Recovering log #87235 mode 2
debug 2023-03-17T11:27:03.746+0000 7f0d50d3f080 4 rocksdb: [db_impl/db_impl_open.cc:760] Recovering log #87237 mode 2
debug 2023-03-17T11:27:04.348+0000 7f0d50d3f080 3 rocksdb: [le/block_based/filter_policy.cc:584] Using legacy Bloom filter with high (20) bits/key. Dramatic filter space and/or accuracy improvement is available with format_version>=5.
debug 2023-03-17T11:27:04.594+0000 7f0d50d3f080 1 bluefs _allocate unable to allocate 0x90000 on bdev 1, allocator name block, allocator type hybrid, capacity 0x3e80000000, block size 0x1000, free 0xdb1786000, fragmentation 0.519235, allocated 0x0
debug 2023-03-17T11:27:04.594+0000 7f0d50d3f080 -1 bluefs _allocate allocation failed, needed 0x80432
debug 2023-03-17T11:27:04.594+0000 7f0d50d3f080 -1 bluefs _flush_range allocated: 0x0 offset: 0x0 length: 0x80432
/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: In function 'int BlueFS::_flush_range(BlueFS::FileWriter*, uint64_t, uint64_t)' thread 7f0d50d3f080 time 2023-03-17T11:27:04.595371+0000
/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: 2768: ceph_abort_msg("bluefs enospc")
ceph version 16.2.7-126.el8cp (fe0af61d104d48cb9d116cde6e593b5fc8c197e4) pacific (stable)
1: (ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0xe5) [0x562f2f19b562]
2: (BlueFS::_flush_range(BlueFS::FileWriter*, unsigned long, unsigned long)+0x1131) [0x562f2f8999e1]
3: (BlueFS::_flush(BlueFS::FileWriter*, bool, bool*)+0x90) [0x562f2f899cc0]
4: (BlueFS::_flush(BlueFS::FileWriter*, bool, std::unique_lock<std::mutex>&)+0x32) [0x562f2f8aadd2]
5: (BlueRocksWritableFile::Append(rocksdb::Slice const&)+0x11b) [0x562f2f8c370b]
6: (rocksdb::LegacyWritableFileWrapper::Append(rocksdb::Slice const&, rocksdb::IOOptions const&, rocksdb::IODebugContext*)+0x1f) [0x562f2fd5d89f]
7: (rocksdb::WritableFileWriter::WriteBuffered(char const*, unsigned long)+0x58a) [0x562f2fe6f5ea]
8: (rocksdb::WritableFileWriter::Append(rocksdb::Slice const&)+0x2d0) [0x562f2fe70a40]
9: (rocksdb::BlockBasedTableBuilder::WriteRawBlock(rocksdb::Slice const&, rocksdb::CompressionType, rocksdb::BlockHandle*, bool)+0xb6) [0x562f2ff8c1c6]
10: (rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::Slice const&, rocksdb::BlockHandle*, bool)+0x26c) [0x562f2ff8cb0c]
11: (rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::BlockBuilder*, rocksdb::BlockHandle*, bool)+0x3c) [0x562f2ff8d20c]
12: (rocksdb::BlockBasedTableBuilder::Flush()+0x6d) [0x562f2ff8d29d]
13: (rocksdb::BlockBasedTableBuilder::Add(rocksdb::Slice const&, rocksdb::Slice const&)+0x2b8) [0x562f2ff90708]
14: (rocksdb::BuildTable(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::Env*, rocksdb::FileSystem*, rocksdb::ImmutableCFOptions const&, rocksdb::MutableCFOptions const&, rocksdb::FileOptions const&, rocks
db::TableCache*, rocksdb::InternalIteratorBase<rocksdb::Slice>*, std::vector<std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> >, std::allocator<std::unique_ptr<rocksdb::FragmentedRan
geTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> > > >, rocksdb::FileMetaData*, rocksdb::InternalKeyComparator const&, std::vector<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblP
ropCollectorFactory> >, std::allocator<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> > > > const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>
> const&, std::vector<unsigned long, std::allocator<unsigned long> >, unsigned long, rocksdb::SnapshotChecker*, rocksdb::CompressionType, unsigned long, rocksdb::CompressionOptions const&, bool, rocksdb::InternalStats*, rocksdb::TableFileCreationReason, r
ocksdb::EventLogger*, int, rocksdb::Env::IOPriority, rocksdb::TableProperties*, int, unsigned long, unsigned long, rocksdb::Env::WriteLifeTimeHint, unsigned long)+0xa45) [0x562f2ff3b185]
15: (rocksdb::DBImpl::WriteLevel0TableForRecovery(int, rocksdb::ColumnFamilyData*, rocksdb::MemTable*, rocksdb::VersionEdit*)+0xcf5) [0x562f2fda01e5]
16: (rocksdb::DBImpl::RecoverLogFiles(std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long*, bool, bool*)+0x1c2e) [0x562f2fda291e]
17: (rocksdb::DBImpl::Recover(std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, bool, bool, bool, unsigned long*)+0xae8) [0x562f2fda3c78]
18: (rocksdb::DBImpl::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vect
or<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**, bool, bool)+0x59d) [0x562f2fd9d99d]
19: (rocksdb::DB::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<r
ocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**)+0x15) [0x562f2fd9ed35]
20: (RocksDBStore::do_open(std::ostream&, bool, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x10c1) [0x562f2fd16c71]
21: (BlueStore::_open_db(bool, bool, bool)+0x8c7) [0x562f2f793bb7]
22: (BlueStore::_open_db_and_around(bool, bool)+0x2f7) [0x562f2f7fe5e7]
23: (BlueStore::_mount()+0x204) [0x562f2f8014a4]
24: (OSD::init()+0x380) [0x562f2f2d35d0]
25: main()
26: __libc_start_main()
27: _start()
*** Caught signal (Aborted) **
in thread 7f0d50d3f080 thread_name:ceph-osd
debug 2023-03-17T11:27:04.605+0000 7f0d50d3f080 -1 /builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: In function 'int BlueFS::_flush_range(BlueFS::FileWriter*, uint64_t, uint64_t)' thread 7f0d50d3f080 time 2023-03-17T11:27:04.595371+0000
/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: 2768: ceph_abort_msg("bluefs enospc")
Resolution
NOTE
- Steps 1 is ONLY needed if you MUST get the OSDs running and the pg states are not active+clean. If this doesn't apply to you, move to step 2 to replace the OSD.
-
For this process it is recommended to have 2 terminal sessions open side by side:
1 for rook-ceph-tools (ceph commands) and 1 for oc client (oc commands). -
Set up the rook-ceph-tools pod if not deployed already, then rsh into it:
# oc patch OCSInitialization ocsinit -n openshift-storage --type json --patch '[{ "op": "replace", "path": "/spec/enableCephTools", "value": true }]'
# oc rsh -n openshift-storage $(oc get pods -l app=rook-ceph-tools -o name)
1) First we need to make all OSD up in ceph , follow the next steps:
- Pause all client I/O to prevent read/writes to Ceph, from the rook-ceph-tools pod:
$ ceph osd set pause
check:
sh-4.4$ ceph -s
cluster:
id: 2e930d15-8718-4131-83d5-bb349bab36ec
health: HEALTH_WARN
1 filesystem is degraded
pauserd,pausewr,nodeep-scrub flag(s) set. <<--- pauserd = pause for read IO and pausewr = for writes
- Scale down ocs and rook-ceph operators so they stop reconciling:
# oc scale deployment rook-ceph-operator ocs-operator --replicas=0 -n openshift-storage
- Set your variables for saving the original deployment and the target OSD :
# starttime=$(date +%F_%H-%M-%S)
# osdid=<the-clbo-osd-id>
- Save the original OSD deployment:
# oc get deployment rook-ceph-osd-${osdid} -o yaml > ${osdid}.${starttime}.yaml
-
The next steps need to be performed one at a time. The OSD pod needs to restart after applying each command.
-
Remove liveness and startup probes.
# oc set probe deployment rook-ceph-osd-${osdid} --remove --liveness --startup
# oc get pods -w |grep osd
osd pod will restart and show Running 2/2
- Sleep the container (this way the osd pod is started, have access to the disk device, but ceph osd daemon is NOT started)
# oc patch deployment rook-ceph-osd-${osdid} -n openshift-storage -p '{"spec": {"template": {"spec": {"containers": [{"name": "osd", "command": ["sleep"], "args": ["infinity"]}]}}}}'
# oc get pods -w |grep osd
osd pod will restart and show Running 2/2
- Edit the OSD deployment and add memory for bluefs process (this step may no be needed, in other cases it worked also with default value with 5Gi , will depend of amount of data )
Edit osd deployment for 12Gi mem under osd container, this will apply the memory parameter change to all containers within the pod:
# oc edit deployment rook-ceph-osd-${osdid}.
containers:
- args:
name: osd
resources:
limits:
cpu: "2"
memory: 5Gi <<-- change to 12
requests:
cpu: "2"
memory: 5Gi. <<-- change to 12
# oc get pods -w |grep osd
# osd pod will restart and show Running 2/2
- In the rook ceph tools pod: (For ease of visual parsing osd.2 is used for this example)
$ ceph config get osd.2 bluefs_shared_alloc_size
65536
- From the oc client, connect to the running osd pod and run fsck
# oc rsh rook-ceph-osd-2-< suffix-id >
sh-4.4# ceph-bluestore-tool fsck --path /var/lib/ceph/osd/ceph-2
Note. If this fails, may need to run with the
sh-4.4# ceph-bluestore-tool fsck --path /var/lib/ceph/osd/ceph-2 --bluefs_shared_alloc_size=16384
- In rook ceph tools pod set the alloc size to 16K
$ ceph config set osd.2 bluefs_shared_alloc_size 16384
check with:
$ ceph config get osd.2 bluefs_shared_alloc_size
$ ceph config dump
- Now we need to make osd up in ceph with this new value of 16K. For that we need to restore the original deployment we made above , that will trigger the deletion of the ods pod and will start a new osp pod with the osd daemon running. Example:
oc replace -f 2.2023-03-10_03-42-40.yaml --force
NOTE1. In case pod fails to start, see:
ODF 4.X - OSD pod Init:CrashLoopBackOff due to initContainer expand-bluefs - BlueStore::expand_devices
- watch ceph status , from rook-ceph-tools
$ ceph -s -w
$ ceph osd df tree
- delete mgr pod to reflect accurate pg stats (this step may not be required)
from oc client:
# oc delete pod rook-ceph-mgr-a-< suffix-id >
or from the ceph-tool-box:
$ ceph mgr fail
- The targeted OSD should now show up and in, wait for recovery , from ceph-tool-box check:
$ ceph -s -w
$ ceph osd df tree
-
If there are other OSD with the same issue, you can repeat above steps with that new osd.
-
When you conclude , you should have;
All OSD up+in (ceph osd tree)
All PGs were inactive+clean
ceph status OK -
At this point you can unpause /resume client I/O to Ceph, from the rook-ceph-tools pod:
$ ceph osd unpause
- Scale up ocs and rook-ceph operators:
# oc scale deployment rook-ceph-operator ocs-operator --replicas=1 -n openshift-storage
- Also try to remove as much data as possible from application side, you have run into this issue most probably because you are run out of space (or 64K blocks)
or
consider to add more capacity to your cluster : Scaling Storage
NOTE2. We cannot leave indefinitely the OSD started with the new bluefs_shared_alloc_size (device contains a mixture of data allocated in blocks of 64K, and other data in blocks of 16K). If the osd pod is restarted, the OSD pod will fail to start with the error described above in NOTE1. For that reason, we need to delete and recreate a new OSD / replace it as explained on next step 2)
2) Finally we need to revert back to the original value bluefs_shared_alloc_size of 64K. That requires to delete the OSD from ODF/ceph configuration and recreate/start a new OSD disk (or empty/zapped disk) , so all data will be rebalanced and rewritten from scratch in blocks of 64K. Steps are :
- From oc client, Scale down
osd.2
# oc scale deployment rook-ceph-osd-2 --replicas=0
- Remove the
bluefs_shared_alloc_size
set before, from ceph-tool-box:
$ ceph config rm osd.2 bluefs_shared_alloc_size
- Replace / re-deploy
osd.2
with new or zapped/wiped disk, follow the steps from ODF documentation
Replacing devices , depending on your platform you may have different instructions.
**NOTE3.*** AWS platform instructions are not valid, you can follow similar steps as described for VMware platform
or follow this other generic document:
Steps to remove unwanted or failed Ceph OSD in Red Hat OpenShift Data Foundation(previously known as Red Hat OpenShift Container Storage)
NOTE4. You can wipe out the disk (will be required when using local storage, so the disk device will be the same)
From the OCS node where the disk device is present (on this example it was device /dev/sdb) :
sh-4.4# dd if=/dev/zero of=/dev/sdb bs=1M count=10 conv=fsync
10+0 records in
10+0 records out
10485760 bytes (10 MB, 10 MiB) copied, 0.025526 s, 411 MB/s
sh-4.4#
- monitor for recovery to complete, from ceph-tool-box check:
$ ceph -s
$ ceph osd df tree
- If there are other OSDs that had to be started with bluefs_shared_alloc_size to 16K, repeat above steps for each OSD at a time.
Root Cause
-
In ODF clusters where the customer has the minimum required OSDs to stand up a cluster( 3 ). Two of the OSDs enter CLBO with error "bluefs enospc" in the OSD logs, and two OSDs show down in the ceph osd tree. This has been primarily seen during upgrades from 4.9 to 4.10 , but it can happen in other scenarios, ie when many small files are used by application (ie 1K) that causes fragmentation. The steps in this KCS are targeted for that scenario. If a single OSD is down simply Replace the OSD following our documentation.
-
Due to high fragmentation, the bluefs allocator is unable to find 64Kb (minimum allocation size) blocks / contiguous chunks on the osd disk to allocate new data.
-
This was addressed on
Bug 2141110 - [RFE] Improve handling of BlueFS ENOSPCFixed on Red Hat Ceph Storage 6.1 == 17.2.6-70.el9cp
[Red Hat Ceph Storage 6.1 security and bug fix update - ERRATA RHSA-2023:3623](https://access.redhat.com/errata/RHSA-2023:3623] -
ODF 4.13.2 is based in ceph version 17.2.6-100.el9cp , so it contains the fix that consist on , from Bug 2141110 #c24
"... the RHCS 6.1 version already having the fix for the handling will not break even when bluefs_shared_alloc_size is left as 65536 because it will automatically fallback to 4K, it will be trying to use bluefs_shared_alloc_size, but it it fails it will use bluestore_alloc_size(default value 4096), it will be using it for some time. After some time (few seconds) it will try again to use bluefs_shared_alloc_size."
Diagnostic Steps
- If you are hitting this issue, we need to collect the following data with ceph-bluestore-tool from the osd pod object of the issue, specially important the "--allocator block free-dump" output (big in size), refer to KCS How to run ceph-bluestore-tool (CBT) in an OCS 4.X / ODF environment
# ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-<osdid>/ bluefs-stats
# ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-<osdid>/ bluefs-bdev-sizes
# ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-<osdid>/ --allocator block free-score
# ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-<osdid>/ --allocator block free-dump > osdX-free-dump.txt
OR
collect directly the output on the bastion /tmp , example:
oc exec rook-ceph-osd-1-xxxx -c osd -- ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-1/ --allocator block free-dump > /tmp/osd1-free-dump.txt
- OSD log snip
debug -9> 2023-01-26T21:07:27.980+0000 7f063df56080 4 rocksdb: EVENT_LOG_v1 {"time_micros": 1674767247980746, "job": 1, "event": "recovery_started", "log_files": [168629, 168637, 168639]}
debug -8> 2023-01-26T21:07:27.980+0000 7f063df56080 4 rocksdb: [db_impl/db_impl_open.cc:760] Recovering log #168629 mode 2
debug -7> 2023-01-26T21:07:29.733+0000 7f063df56080 4 rocksdb: [db_impl/db_impl_open.cc:760] Recovering log #168637 mode 2
debug -6> 2023-01-26T21:07:31.562+0000 7f063df56080 4 rocksdb: [db_impl/db_impl_open.cc:760] Recovering log #168639 mode 2
debug -5> 2023-01-26T21:07:31.589+0000 7f063df56080 3 rocksdb: [le/block_based/filter_policy.cc:584] Using legacy Bloom filter with high (20) bits/key. Dramatic filter space and/or accuracy improvement is available with format_version>=5.
debug -4> 2023-01-26T21:07:32.752+0000 7f063df56080 1 bluefs _allocate unable to allocate 0x80000 on bdev 1, allocator name block, allocator type hybrid, capacity 0x20000000000, block size 0x1000, free 0x4eb21f1000, fragmentation 0.900168, allocated 0x0
debug -3> 2023-01-26T21:07:32.752+0000 7f063df56080 -1 bluefs _allocate allocation failed, needed 0x70d06
debug -2> 2023-01-26T21:07:32.752+0000 7f063df56080 -1 bluefs _flush_range allocated: 0x90000 offset: 0x807bd length: 0x80549
debug -1> 2023-01-26T21:07:32.761+0000 7f063df56080 -1 /builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: In function 'int BlueFS::_flush_range(BlueFS::FileWriter*, uint64_t, uint64_t)' thread 7f063df56080 time 2023-01-26T21:07:32.752645+0000
/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: 2768: ceph_abort_msg("bluefs enospc")
Note: bluefs _allocate allocation failed or ceph_abort_msg("bluefs enospc")
This issue also has presented with:
- bluefs _allocate allocation failed and BlueFS.cc: 2352: FAILED ceph_assert(r == 0)
Example of the error:
2023-02-28T12:15:51.708752402Z ceph version 16.2.7-98.el8cp (b20d33c3b301e005bed203d3cad7245da3549f80) pacific (stable)
2023-02-28T12:15:51.708752402Z 1: /lib64/libpthread.so.0(+0x12c20) [0x7f67110a6c20]
2023-02-28T12:15:51.708752402Z 2: gsignal()
2023-02-28T12:15:51.708752402Z 3: abort()
2023-02-28T12:15:51.708752402Z 4: (ceph::__ceph_abort(char const*, int, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x1b6) [0x55591687864d]
2023-02-28T12:15:51.708752402Z 5: (BlueFS::_flush_range(BlueFS::FileWriter*, unsigned long, unsigned long)+0x1131) [0x555916f750a1]
2023-02-28T12:15:51.708752402Z 6: (BlueFS::_flush(BlueFS::FileWriter*, bool, bool*)+0x90) [0x555916f75380]
2023-02-28T12:15:51.708752402Z 7: (BlueFS::_flush(BlueFS::FileWriter*, bool, std::unique_lock<std::mutex>&)+0x32) [0x555916f86492]
2023-02-28T12:15:51.708752402Z 8: (BlueRocksWritableFile::Append(rocksdb::Slice const&)+0x11b) [0x555916f9edcb]
2023-02-28T12:15:51.708752402Z 9: (rocksdb::LegacyWritableFileWrapper::Append(rocksdb::Slice const&, rocksdb::IOOptions const&, rocksdb::IODebugContext*)+0x1f) [0x555917438d8f]
2023-02-28T12:15:51.708752402Z 10: (rocksdb::WritableFileWriter::WriteBuffered(char const*, unsigned long)+0x58a) [0x55591754aada]
2023-02-28T12:15:51.708752402Z 11: (rocksdb::WritableFileWriter::Append(rocksdb::Slice const&)+0x2d0) [0x55591754bf30]
2023-02-28T12:15:51.708752402Z 12: (rocksdb::BlockBasedTableBuilder::WriteRawBlock(rocksdb::Slice const&, rocksdb::CompressionType, rocksdb::BlockHandle*, bool)+0xb6) [0x5559176676b6]
2023-02-28T12:15:51.708752402Z 13: (rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::Slice const&, rocksdb::BlockHandle*, bool)+0x26c) [0x555917667ffc]
2023-02-28T12:15:51.708752402Z 14: (rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::BlockBuilder*, rocksdb::BlockHandle*, bool)+0x3c) [0x5559176686fc]
2023-02-28T12:15:51.708752402Z 15: (rocksdb::BlockBasedTableBuilder::Flush()+0x6d) [0x55591766878d]
2023-02-28T12:15:51.708752402Z 16: (rocksdb::BlockBasedTableBuilder::Add(rocksdb::Slice const&, rocksdb::Slice const&)+0x2b8) [0x55591766bbf8]
2023-02-28T12:15:51.708752402Z 17: (rocksdb::BuildTable(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::Env*, rocksdb::FileSystem*, rocksdb::ImmutableCFOptions const&, rocksdb::MutableCFOptions const&, rocksdb::FileOptions const&, rocksdb::TableCache*, rocksdb::InternalIteratorBase<rocksdb::Slice>*, std::vector<std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> >, std::allocator<std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> > > >, rocksdb::FileMetaData*, rocksdb::InternalKeyComparator const&, std::vector<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> >, std::allocator<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> > > > const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<unsigned long, std::allocator<unsigned long> >, unsigned long, rocksdb::SnapshotChecker*, rocksdb::CompressionType, unsigned long, rocksdb::CompressionOptions const&, bool, rocksdb::InternalStats*, rocksdb::TableFileCreationReason, rocksdb::EventLogger*, int, rocksdb::Env::IOPriority, rocksdb::TableProperties*, int, unsigned long, unsigned long, rocksdb::Env::WriteLifeTimeHint, unsigned long)+0xa45) [0x555917616675]
2023-02-28T12:15:51.708752402Z 18: (rocksdb::DBImpl::WriteLevel0TableForRecovery(int, rocksdb::ColumnFamilyData*, rocksdb::MemTable*, rocksdb::VersionEdit*)+0xcf5) [0x55591747b6d5]
2023-02-28T12:15:51.708752402Z 19: (rocksdb::DBImpl::RecoverLogFiles(std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long*, bool, bool*)+0x1c2e) [0x55591747de0e]
2023-02-28T12:15:51.708752402Z 20: (rocksdb::DBImpl::Recover(std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, bool, bool, bool, unsigned long*)+0xae8) [0x55591747f168]
2023-02-28T12:15:51.708752402Z 21: (rocksdb::DBImpl::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**, bool, bool)+0x59d) [0x555917478e8d]
2023-02-28T12:15:51.708752402Z 22: (rocksdb::DB::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**)+0x15) [0x55591747a225]
2023-02-28T12:15:51.708752402Z 23: (RocksDBStore::do_open(std::ostream&, bool, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x10c1) [0x5559173f2161]
2023-02-28T12:15:51.708752402Z 24: (BlueStore::_open_db(bool, bool, bool)+0x8c7) [0x555916e6f217]
2023-02-28T12:15:51.708752402Z 25: (BlueStore::_open_db_and_around(bool, bool)+0x2f7) [0x555916ed9ca7]
2023-02-28T12:15:51.708752402Z 26: (BlueStore::_mount()+0x204) [0x555916edcb64]
2023-02-28T12:15:51.708752402Z 27: (OSD::init()+0x380) [0x5559169b05e0]
2023-02-28T12:15:51.708752402Z 28: main()
2023-02-28T12:15:51.708752402Z 29: __libc_start_main()
2023-02-28T12:15:51.708752402Z 30: _start()
2023-02-28T12:15:51.708752402Z NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.
# assert:
"assert_condition": "r == 0",
"assert_file": "/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc",
"assert_func": "void BlueFS::_compact_log_async(std::unique_lock<std::mutex>&)",
"assert_line": 2352,
"assert_msg": "/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: In function 'void BlueFS::_compact_log_async(std::unique_lock<std::mutex>&)' thread 7f11ceab7080 time 2023-02-28T12:25:51.797064+0000\n/builddir/build/BUILD/ceph-16.2.7/src/os/bluestore/BlueFS.cc: 2352: FAILED ceph_assert(r == 0)\n",
"assert_thread_name": "ceph-osd",
"backtrace": [
"/lib64/libpthread.so.0(+0x12c20) [0x7f11cca5dc20]",
"gsignal()",
"abort()",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x55b50b2bbe8d]",
"ceph-osd(+0x56b056) [0x55b50b2bc056]",
"(BlueFS::_compact_log_async(std::unique_lock<std::mutex>&)+0x19bc) [0x55b50b9ca12c]",
"(BlueFS::_flush(BlueFS::FileWriter*, bool, std::unique_lock<std::mutex>&)+0x67) [0x55b50b9ca4c7]",
"(BlueRocksWritableFile::Append(rocksdb::Slice const&)+0x11b) [0x55b50b9e2dcb]",
"(rocksdb::LegacyWritableFileWrapper::Append(rocksdb::Slice const&, rocksdb::IOOptions const&, rocksdb::IODebugContext*)+0x1f) [0x55b50be7cd8f]",
"(rocksdb::WritableFileWriter::WriteBuffered(char const*, unsigned long)+0x58a) [0x55b50bf8eada]",
"(rocksdb::WritableFileWriter::Append(rocksdb::Slice const&)+0x2d0) [0x55b50bf8ff30]",
"(rocksdb::BlockBasedTableBuilder::WriteRawBlock(rocksdb::Slice const&, rocksdb::CompressionType, rocksdb::BlockHandle*, bool)+0xb6) [0x55b50c0ab6b6]",
"(rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::Slice const&, rocksdb::BlockHandle*, bool)+0x26c) [0x55b50c0abffc]",
"(rocksdb::BlockBasedTableBuilder::WriteBlock(rocksdb::BlockBuilder*, rocksdb::BlockHandle*, bool)+0x3c) [0x55b50c0ac6fc]",
"(rocksdb::BlockBasedTableBuilder::Flush()+0x6d) [0x55b50c0ac78d]",
"(rocksdb::BlockBasedTableBuilder::Add(rocksdb::Slice const&, rocksdb::Slice const&)+0x2b8) [0x55b50c0afbf8]",
"(rocksdb::BuildTable(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::Env*, rocksdb::FileSystem*, rocksdb::ImmutableCFOptions const&, rocksdb::MutableCFOptions const&, rocksdb::FileOptions const&, rocksdb::TableCache*, rocksdb::InternalIteratorBase<rocksdb::Slice>*, std::vector<std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> >, std::allocator<std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator, std::default_delete<rocksdb::FragmentedRangeTombstoneIterator> > > >, rocksdb::FileMetaData*, rocksdb::InternalKeyComparator const&, std::vector<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> >, std::allocator<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> > > > const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<unsigned long, std::allocator<unsigned long> >, unsigned long, rocksdb::SnapshotChecker*, rocksdb::CompressionType, unsigned long, rocksdb::CompressionOptions const&, bool, rocksdb::InternalStats*, rocksdb::TableFileCreationReason, rocksdb::EventLogger*, int, rocksdb::Env::IOPriority, rocksdb::TableProperties*, int, unsigned long, unsigned long, rocksdb::Env::WriteLifeTimeHint, unsigned long)+0xa45) [0x55b50c05a675]",
"(rocksdb::DBImpl::WriteLevel0TableForRecovery(int, rocksdb::ColumnFamilyData*, rocksdb::MemTable*, rocksdb::VersionEdit*)+0xcf5) [0x55b50bebf6d5]",
"(rocksdb::DBImpl::RecoverLogFiles(std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long*, bool, bool*)+0x1c2e) [0x55b50bec1e0e]",
"(rocksdb::DBImpl::Recover(std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, bool, bool, bool, unsigned long*)+0xae8) [0x55b50bec3168]",
"(rocksdb::DBImpl::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**, bool, bool)+0x59d) [0x55b50bebce8d]",
"(rocksdb::DB::Open(rocksdb::DBOptions const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**)+0x15) [0x55b50bebe225]",
"(RocksDBStore::do_open(std::ostream&, bool, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x10c1) [0x55b50be36161]",
"(BlueStore::_open_db(bool, bool, bool)+0x8c7) [0x55b50b8b3217]",
"(BlueStore::_open_db_and_around(bool, bool)+0x2f7) [0x55b50b91dca7]",
"(BlueStore::_mount()+0x204) [0x55b50b920b64]",
"(OSD::init()+0x380) [0x55b50b3f45e0]",
"main()",
"__libc_start_main()",
"_start()"
],
"ceph_version": "16.2.7-98.el8cp",
"crash_id": "2023-02-28T12:25:51.815614Z_38ad11e7-8514-4b95-8b8a-a25624e2140e",
"entity_name": "osd.2",
"os_id": "rhel",
"os_name": "Red Hat Enterprise Linux",
"os_version": "8.5 (Ootpa)",
"os_version_id": "8.5",
"process_name": "ceph-osd",
"stack_sig": "dffbb043e1dd6da2426a59719d178b864fb11fe75498a47757d3522e092561f5",
"timestamp": "2023-02-28T12:25:51.815614Z",
"utsname_hostname": "rook-ceph-osd-2-7b8b5986cb-vpfd6",
"utsname_machine": "x86_64",
"utsname_release": "4.18.0-305.40.2.el8_4.x86_64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Tue Mar 8 14:29:54 EST 2022"
This solution is part of Red Hat’s fast-track publication program, providing a huge library of solutions that Red Hat engineers have created while supporting our customers. To give you the knowledge you need the instant it becomes available, these articles may be presented in a raw and unedited form.
Comments