11

Optimize `vec::retain` performance by the8472 · Pull Request #91527 · rust-lang...

 2 years ago
source link: https://github.com/rust-lang/rust/pull/91527
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

Copy link

Contributor

hkratz commented on Dec 4, 2021

Performs similar to what I came up with before seeing that you claimed the issue:

    #[unstable(feature = "vec_retain_mut", issue = "90829")]
    pub fn retain_mut<F>(&mut self, mut f: F)
    where
        F: FnMut(&mut T) -> bool,
    {
        let original_len = self.len();
        // Avoid double drop if the drop guard is not executed,
        // since we may make some holes during the process.
        unsafe { self.set_len(0) };

        // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
        //      |<-              processed len   ->| ^- next to check
        //                  |<-  deleted cnt     ->|
        //      |<-              original_len                          ->|
        // Kept: Elements which predicate returns true on.
        // Hole: Moved or dropped element slot.
        // Unchecked: Unchecked valid elements.
        //
        // This drop guard will be invoked when predicate or `drop` of element panicked.
        // It shifts unchecked elements to cover holes and `set_len` to the correct length.
        // In cases when predicate and `drop` never panick, it will be optimized out.
        struct BackshiftOnDrop<'a, T, A: Allocator> {
            v: &'a mut Vec<T, A>,
            processed_len: usize,
            deleted_cnt: usize,
            original_len: usize,
        }

        impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
            fn drop(&mut self) {
                if self.deleted_cnt > 0 {
                    // SAFETY: Trailing unchecked items must be valid since we never touch them.
                    unsafe {
                        ptr::copy(
                            self.v.as_ptr().add(self.processed_len),
                            self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
                            self.original_len - self.processed_len,
                        );
                    }
                }
                // SAFETY: After filling holes, all items are in contiguous memory.
                unsafe {
                    self.v.set_len(self.original_len - self.deleted_cnt);
                }
            }
        }

        let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };

        // check_one returns the current element if it was retained.
        #[inline(always)]
        fn check_one<F, T, A: Allocator>(
            f: &mut F,
            g: &mut BackshiftOnDrop<'_, T, A>,
        ) -> Option<*const T>
        where
            F: FnMut(&mut T) -> bool,
        {
            // SAFETY: Unchecked element must be valid.
            let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
            let retain = f(cur);
            // Advance early to avoid double drop if `drop_in_place` panics.
            g.processed_len += 1;
            if retain {
                Some(cur)
            } else {
                g.deleted_cnt += 1;
                // SAFETY: We never touch this element again after dropped.
                unsafe { ptr::drop_in_place(cur) };
                None
            }
        }

        // Stage 1: Nothing was deleted.
        while g.processed_len != original_len {
            if check_one(&mut f, &mut g).is_none() {
                break;
            }
        }

        // Stage 2: Some elements were deleted.
        while g.processed_len != original_len {
            if let Some(cur) = check_one(&mut f, &mut g) {
                // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
                // We use copy for move, and never touch this element again.
                unsafe {
                    let hole_slot = g.v.as_mut_ptr().add(g.processed_len - 1 - g.deleted_cnt);
                    ptr::copy_nonoverlapping(cur, hole_slot, 1);
                }
            }
        }

        // All item are processed. This can be optimized to `set_len` by LLVM.
        drop(g);
    }

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK