Created Destructors, 2nd edition (rest)
This commit is contained in:
parent
9dadf6653f
commit
b853d7a4f8
|
@ -0,0 +1,435 @@
|
|||
==================================
|
||||
Nim Destructors and Move Semantics
|
||||
==================================
|
||||
|
||||
:Authors: Andreas Rumpf
|
||||
:Version: |nimversion|
|
||||
|
||||
.. contents::
|
||||
|
||||
|
||||
About this document
|
||||
===================
|
||||
|
||||
This document describes the upcoming Nim runtime which does
|
||||
not use classical GC algorithms anymore but is based on destructors and
|
||||
move semantics. The new runtime's advantages are that Nim programs become
|
||||
oblivious to the involved heap sizes and programs are easier to write to make
|
||||
effective use of multi-core machines. As a nice bonus, files and sockets and
|
||||
the like will not require manual ``close`` calls anymore.
|
||||
|
||||
This document aims to be a precise specification about how
|
||||
move semantics and destructors work in Nim.
|
||||
|
||||
|
||||
Motivating example
|
||||
==================
|
||||
|
||||
With the language mechanisms described here a custom seq could be
|
||||
written as:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
type
|
||||
myseq*[T] = object
|
||||
len, cap: int
|
||||
data: ptr UncheckedArray[T]
|
||||
|
||||
proc `=destroy`*[T](x: var myseq[T]) =
|
||||
if x.data != nil:
|
||||
for i in 0..<x.len: `=destroy`(x[i])
|
||||
dealloc(x.data)
|
||||
x.data = nil
|
||||
|
||||
proc `=`*[T](a: var myseq[T]; b: myseq[T]) =
|
||||
# do nothing for self-assignments:
|
||||
if a.data == b.data: return
|
||||
`=destroy`(a)
|
||||
a.len = b.len
|
||||
a.cap = b.cap
|
||||
if b.data != nil:
|
||||
a.data = cast[type(a.data)](alloc(a.cap * sizeof(T)))
|
||||
for i in 0..<a.len:
|
||||
a.data[i] = b.data[i]
|
||||
|
||||
proc `=move`*[T](a, b: var myseq[T]) =
|
||||
# do nothing for self-assignments:
|
||||
if a.data == b.data: return
|
||||
`=destroy`(a)
|
||||
a.len = b.len
|
||||
a.cap = b.cap
|
||||
a.data = b.data
|
||||
# b's elements have been stolen so ensure that the
|
||||
# destructor for b does nothing:
|
||||
b.data = nil
|
||||
b.len = 0
|
||||
|
||||
proc add*[T](x: var myseq[T]; y: sink T) =
|
||||
if x.len >= x.cap: resize(x)
|
||||
x.data[x.len] = y
|
||||
inc x.len
|
||||
|
||||
proc `[]`*[T](x: myseq[T]; i: Natural): lent T =
|
||||
assert i < x.len
|
||||
x.data[i]
|
||||
|
||||
proc `[]=`*[T](x: myseq[T]; i: Natural; y: sink T) =
|
||||
assert i < x.len
|
||||
x.data[i] = y
|
||||
|
||||
proc createSeq*[T](elems: varargs[T]): myseq[T] =
|
||||
result.cap = elems.len
|
||||
result.len = elems.len
|
||||
result.data = cast[type(result.data)](alloc(result.cap * sizeof(T)))
|
||||
for i in 0..<result.len: result.data[i] = elems[i]
|
||||
|
||||
proc len*[T](x: myseq[T]): int {.inline.} = x.len
|
||||
|
||||
|
||||
|
||||
Lifetime-tracking hooks
|
||||
=======================
|
||||
|
||||
The memory management for Nim's standard ``string`` and ``seq`` types as
|
||||
well as other standard collections is performed via so called
|
||||
"Lifetime-tracking hooks" or "type-bound operators". There are 3 different
|
||||
hooks for each (generic or concrete) object type ``T`` (``T`` can also be a
|
||||
``distinct`` type) that are called implicitly by the compiler.
|
||||
|
||||
(Note: The word "hook" here does not imply any kind of dynamic binding
|
||||
or runtime indirections, the implicit calls are statically bound and
|
||||
potentially inlined.)
|
||||
|
||||
|
||||
`=destroy` hook
|
||||
---------------
|
||||
|
||||
A `=destroy` hook frees the object's associated memory and releases
|
||||
other associated resources. Variables are destroyed via this hook when
|
||||
they go out of scope or when the routine they were declared in is about
|
||||
to return.
|
||||
|
||||
The prototype of this hook for a type ``T`` needs to be:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=destroy`(x: var T)
|
||||
|
||||
|
||||
The general pattern in ``=destroy`` looks like:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=destroy`(x: var T) =
|
||||
if x.field != nil:
|
||||
dealloc(x.field)
|
||||
x.field = nil
|
||||
|
||||
|
||||
|
||||
`=move` hook
|
||||
------------
|
||||
|
||||
A `=move` hook moves an object around, the resources are stolen from the source
|
||||
and passed to the destination. It must be ensured that source's destructor does
|
||||
not free the resources afterwards.
|
||||
|
||||
The prototype of this hook for a type ``T`` needs to be:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=move`(dest, source: var T)
|
||||
|
||||
|
||||
The general pattern in ``=move`` looks like:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=move`(dest, source: var T) =
|
||||
# protect against self-assignments:
|
||||
if dest.field != source.field:
|
||||
`=destroy`(dest)
|
||||
dest.field = source.field
|
||||
source.field = nil
|
||||
|
||||
|
||||
|
||||
`=` (copy) hook
|
||||
---------------
|
||||
|
||||
The ordinary assignment in Nim conceptually copies the values. The ``=`` hook
|
||||
is called for assignments that couldn't be transformed into moves.
|
||||
|
||||
The prototype of this hook for a type ``T`` needs to be:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=`(dest: var T; source: T)
|
||||
|
||||
|
||||
The general pattern in ``=`` looks like:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=`(dest: var T; source: T) =
|
||||
# protect against self-assignments:
|
||||
if dest.field != source.field:
|
||||
`=destroy`(dest)
|
||||
dest.field = copy source.field
|
||||
|
||||
|
||||
The ``=`` proc can be marked with the ``{.error.}`` pragma. Then any assignment
|
||||
that otherwise would lead to a copy is prevented at compile-time.
|
||||
|
||||
|
||||
Move semantics
|
||||
==============
|
||||
|
||||
A "move" can be regarded as an optimized copy operation. If the source of the
|
||||
copy operation is not used afterwards, the copy can be replaced by a move. This
|
||||
document uses the notation ``lastReadOf(x)`` to describe that ``x`` is not
|
||||
used afterwards. This property is computed by a static control flow analysis
|
||||
but can also be enforced by using ``system.move`` explicitly.
|
||||
|
||||
|
||||
Swap
|
||||
====
|
||||
|
||||
The need to check for self-assignments and also the need to destroy previous
|
||||
objects inside ``=`` and ``=move`` is a strong indicator to treat ``system.swap``
|
||||
as a builtin primitive of its own that simply swaps every field in the involved
|
||||
objects via ``copyMem`` or a comparable mechanism.
|
||||
In other words, ``swap(a, b)`` is **not** implemented
|
||||
as ``let tmp = move(a); b = move(a); a = move(tmp)``!
|
||||
|
||||
This has further consequences:
|
||||
|
||||
* Objects that contain pointers that point to the same object are not supported
|
||||
by Nim's model. Objects can be swapped and end up in an inconsistent state.
|
||||
* Seqs can use ``realloc`` in the implementation.
|
||||
|
||||
|
||||
Sink parameters
|
||||
===============
|
||||
|
||||
To move a variable into a collection usually ``sink`` parameters are involved.
|
||||
A location that is passed to a ``sink`` parameters should not be used afterwards.
|
||||
This is ensured by a static analysis over a control flow graph. A sink parameter
|
||||
*may* be consumed once in the proc's body but doesn't have to be consumed at all.
|
||||
The reason for this is that signatures
|
||||
like ``proc put(t: var Table; k: sink Key, v: sink Value)`` should be possible
|
||||
without any further overloads and ``put`` might not take owership of ``k`` if
|
||||
``k`` already exists in the table. Sink parameters enable an affine type system,
|
||||
not a linear type system.
|
||||
|
||||
The employed static analysis is limited and only concerned with local variables;
|
||||
however object and tuple fields are treated as separate entities:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc consume(x: sink Obj) = discard "no implementation"
|
||||
|
||||
proc main =
|
||||
let tup = (Obj(), Obj())
|
||||
consume tup[0]
|
||||
# ok, only tup[0] was consumed, tup[1] is still alive:
|
||||
echo tup[1]
|
||||
|
||||
|
||||
Sometimes it is required to explicitly ``move`` a value into its final position:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc main =
|
||||
var dest, src: array[10, string]
|
||||
# ...
|
||||
for i in 0..high(dest): dest[i] = move(src[i])
|
||||
|
||||
An implementation is allowed, but not required to implement even more move
|
||||
optimizations (and the current implementation does not).
|
||||
|
||||
|
||||
Self assignments
|
||||
================
|
||||
|
||||
Unfortunately this document departs significantly from
|
||||
the older design as specified here, https://github.com/nim-lang/Nim/wiki/Destructors.
|
||||
The reason is that under the old design so called "self assignments" could not work.
|
||||
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc select(cond: bool; a, b: sink string): string =
|
||||
if cond:
|
||||
result = a # moves a into result
|
||||
else:
|
||||
result = b # moves b into result
|
||||
|
||||
proc main =
|
||||
var x = "abc"
|
||||
var y = "xyz"
|
||||
|
||||
# possible self-assignment:
|
||||
x = select(rand() < 0.5, x, y)
|
||||
# 'select' must communicate what parameter has been
|
||||
# consumed. We cannot simply generate:
|
||||
# (select(...); wasMoved(x); wasMoved(y))
|
||||
|
||||
Consequence: ``sink`` parameters for objects that have a non-trivial destructor
|
||||
must be passed as by-pointer under the hood. A further advantage is that parameters
|
||||
are never destroyed, only variables are. The caller's location passed to
|
||||
a ``sink`` parameter has to be destroyed by the caller and does not burden
|
||||
the callee.
|
||||
|
||||
|
||||
Const temporaries
|
||||
=================
|
||||
|
||||
Constant literals like ``nil`` cannot be easily be ``=moved``'d. The solution
|
||||
is to pass a temporary location that contains ``nil`` to the sink location.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
var x: owned ref T = nil
|
||||
# gets turned into:
|
||||
var tmp = nil
|
||||
move(x, tmp)
|
||||
|
||||
|
||||
Rewrite rules
|
||||
=============
|
||||
|
||||
**Note**: A function call ``f()`` is always the "last read" of the involved
|
||||
temporary location and so covered under the more general rewrite rules.
|
||||
|
||||
**Note**: There are two different allowed implementation strategies:
|
||||
|
||||
1. The produced ``finally`` section can be a single section that is wrapped
|
||||
around the complete routine body.
|
||||
2. The produced ``finally`` section is wrapped around the enclosing scope.
|
||||
|
||||
The current implementation follows strategy (1). This means that resources are
|
||||
not destroyed at the scope exit, but at the proc exit.
|
||||
|
||||
::
|
||||
|
||||
var x: T; stmts
|
||||
--------------- (var)
|
||||
var x: T; try stmts
|
||||
finally: `=destroy`(x)
|
||||
|
||||
|
||||
f(...)
|
||||
------------------------ (function-call)
|
||||
(let tmp = f(...); tmp)
|
||||
finally: `=destroy`(tmp)
|
||||
|
||||
|
||||
x = lastReadOf z
|
||||
------------------ (move-optimization)
|
||||
`=move`(x, z)
|
||||
|
||||
|
||||
x = y
|
||||
------------------ (copy)
|
||||
`=`(x, y)
|
||||
|
||||
|
||||
x = move y
|
||||
------------------ (enforced-move)
|
||||
`=move`(x, y)
|
||||
|
||||
|
||||
f_sink(notLastReadOf y)
|
||||
----------------------- (copy-to-sink)
|
||||
(let tmp; `=`(tmp, y); f_sink(tmp))
|
||||
finally: `=destroy`(tmp)
|
||||
|
||||
|
||||
f_sink(move y)
|
||||
----------------------- (enforced-move-to-sink)
|
||||
(let tmp; `=move`(tmp, y); f_sink(tmp))
|
||||
finally: `=destroy`(tmp)
|
||||
|
||||
|
||||
Cursor variables
|
||||
================
|
||||
|
||||
There is an additional rewrite rule for so called "cursor" variables.
|
||||
A cursor variable is a variable that is only used for navigation inside
|
||||
a data structure. The otherwise implied copies (or moves) and destructions
|
||||
can be avoided altogether for cursor variables:
|
||||
|
||||
::
|
||||
|
||||
var x {.cursor.}: T
|
||||
x = path(z)
|
||||
stmts
|
||||
-------------------------- (cursor-var)
|
||||
x = bitwiseCopy(path z)
|
||||
stmts
|
||||
# x is not destroyed.
|
||||
|
||||
|
||||
``stmts`` must not mutate ``z`` nor ``x``. All assignments to ``x`` must be
|
||||
of the form ``path(z)`` but the ``z`` can differ. Neither ``z`` nor ``x``
|
||||
can be aliased; this implies the addresses of these locations must not be
|
||||
used explicitly.
|
||||
|
||||
The current implementation does not compute cursor variables but supports
|
||||
the ``.cursor`` pragma annotation. Cursor variables are respected and
|
||||
simply trusted: No checking is performed that no mutations or aliasing
|
||||
occurs.
|
||||
|
||||
Cursor variables are commonly used in ``iterator`` implementations:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
iterator nonEmptyItems(x: seq[string]): string =
|
||||
for i in 0..high(x):
|
||||
let it {.cursor.} = x[i] # no string copies, no destruction of 'it'
|
||||
if it.len > 0:
|
||||
yield it
|
||||
|
||||
|
||||
Owned refs
|
||||
==========
|
||||
|
||||
Let ``W`` be an ``owned ref`` type. Conceptually its hooks look like:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=destroy`(x: var W) =
|
||||
if x != nil:
|
||||
assert x.refcount == 0, "dangling unowned pointers exist!"
|
||||
`=destroy`(x[])
|
||||
x = nil
|
||||
|
||||
proc `=`(x: var W; y: W) {.error: "owned refs can only be moved".}
|
||||
|
||||
proc `=move`(x, y: var W) =
|
||||
if x != y:
|
||||
`=destroy`(x)
|
||||
bitwiseCopy x, y # raw pointer copy
|
||||
y = nil
|
||||
|
||||
Let ``U`` be an unowned ``ref`` type. Conceptually its hooks look like:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
proc `=destroy`(x: var U) =
|
||||
if x != nil:
|
||||
dec x.refcount
|
||||
|
||||
proc `=`(x: var U; y: U) =
|
||||
# Note: No need to check for self-assignments here.
|
||||
if y != nil: inc y.refcount
|
||||
if x != nil: dec x.refcount
|
||||
bitwiseCopy x, y # raw pointer copy
|
||||
|
||||
proc `=move`(x, y: var U) =
|
||||
# Note: Moves are the same as assignments.
|
||||
`=`(x, y)
|
||||
|
Loading…
Reference in New Issue