Skip to content

dicomtrolley.caching

Storing DICOM query results locally to avoid unneeded calls to server

CachedSearcher

Bases: Searcher

A cache wrapped around a Searcher instance. Serves search responses from cache first. Calls searcher if needed.

Caches two types of searcher method calls: find_studies(Query): Caches each returned DICOM object individually and then associates this with the incoming query. Only if the query matches exactly is a cached response returned. Associates DICOM tree address (study/series/instance). Not DICOM object identities. This means that an underlying DICOM object can be updated without invalidating the cache response. The cached response to a query will be returned as long as there are non-expired cached objects at each associated tree address

find_study_by_id(study_id, level): Retrieves study_id from cache and checks whether it has children up to the required level (depth).

Source code in dicomtrolley/caching.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
class CachedSearcher(Searcher):
    """A cache wrapped around a Searcher instance. Serves search responses from
    cache first. Calls searcher if needed.

    Caches two types of searcher method calls:
    find_studies(Query):
        Caches each returned DICOM object individually and then associates this
        with the incoming query. Only if the query matches exactly is a cached
        response returned.
        Associates DICOM tree address (study/series/instance). Not DICOM object
        identities. This means that an underlying DICOM object can be updated without
        invalidating the cache response. The cached response to a query will be
        returned as long as there are non-expired cached objects at each associated
        tree address

    find_study_by_id(study_id, level):
        Retrieves study_id from cache and checks whether it has children up to
        the required level (depth).

    """

    def __init__(self, searcher: Searcher, cache: DICOMObjectCache):
        self.searcher = searcher
        self.cache = cache
        self.query_cache = QueryCache(cache=cache)

    def __str__(self):
        return f"CachedSearcher for {self.searcher}"

    def find_studies(self, query: Query) -> Sequence[Study]:
        """Try to return from cache, otherwise call searcher."""
        try:
            return self.query_cache.get_response(query)
        except NodeNotFound:
            logger.debug(
                f"No cache for {query.to_short_string()}."
                f"Performing query with {self.searcher}"
            )
            response = self.searcher.find_studies(query)
            self.query_cache.add_response(query, response)
            return response

    def find_study_by_id(
        self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY
    ) -> Study:
        """Find a single study at the given depth"""
        try:
            from_cache: Study = self.cache.retrieve(
                StudyReference(study_uid=study_uid)
            )
            if (
                from_cache.max_object_depth()
                > DICOMObjectLevels.from_query_level(query_level)
            ):
                raise NodeNotFound(
                    f"{from_cache} found in cache, but did not contain "
                    f"objects up to '{query_level}' level"
                )
            return from_cache
        except NodeNotFound as e:
            logger.debug(
                f"Could not find study in cache ({e}). Launching query to find"
                f"additional info"
            )
            study = self.searcher.find_study_by_id(
                study_uid, query_level=query_level
            )
            self.cache.add(study)
            return study

find_studies(query)

Try to return from cache, otherwise call searcher.

Source code in dicomtrolley/caching.py
260
261
262
263
264
265
266
267
268
269
270
271
def find_studies(self, query: Query) -> Sequence[Study]:
    """Try to return from cache, otherwise call searcher."""
    try:
        return self.query_cache.get_response(query)
    except NodeNotFound:
        logger.debug(
            f"No cache for {query.to_short_string()}."
            f"Performing query with {self.searcher}"
        )
        response = self.searcher.find_studies(query)
        self.query_cache.add_response(query, response)
        return response

find_study_by_id(study_uid, query_level=QueryLevels.STUDY)

Find a single study at the given depth

Source code in dicomtrolley/caching.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def find_study_by_id(
    self, study_uid: str, query_level: QueryLevels = QueryLevels.STUDY
) -> Study:
    """Find a single study at the given depth"""
    try:
        from_cache: Study = self.cache.retrieve(
            StudyReference(study_uid=study_uid)
        )
        if (
            from_cache.max_object_depth()
            > DICOMObjectLevels.from_query_level(query_level)
        ):
            raise NodeNotFound(
                f"{from_cache} found in cache, but did not contain "
                f"objects up to '{query_level}' level"
            )
        return from_cache
    except NodeNotFound as e:
        logger.debug(
            f"Could not find study in cache ({e}). Launching query to find"
            f"additional info"
        )
        study = self.searcher.find_study_by_id(
            study_uid, query_level=query_level
        )
        self.cache.add(study)
        return study

DICOMObjectCache

Source code in dicomtrolley/caching.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class DICOMObjectCache:
    def __init__(
        self,
        initial_objects: Optional[List[DICOMObject]] = None,
        expiry_seconds: Optional[int] = 600,
    ):
        """A tree holding expiring DICOM objects. Objects can be retrieved by
        study/series/instance UID tuples:

        >>>cache = DICOMObjectCache()
        >>>cache.add(a_study)    # with uid 'study1' and full series and instance info
        >>>cache.retrieve(reference=('study1','series'))
        <Series>
        >>>cache.retrieve(reference=('study1','series','instance'))
        <Instance>
        # 20 minutes later... study has expired
        >>>cache.retrieve(reference=('study1','series'))
        <NodeNotFound "Data for study1,series1 is not cached">

        Parameters
        ----------
        initial_objects, optional
            Shorthand for add() on all objects in this list. Defaults to empty

        expiry_seconds, optional
            Expire objects after this many seconds. If set to None, will disable
            expiry. Defaults to 600 (10 minutes)

        Notes
        -----
        The functionality here is similar to dicomtrolley.parsing.TreeNode, but the
        use case is different enough to warrant a separate class I think

        """
        if expiry_seconds is None:
            self.expiry = None
        else:
            self.expiry = ExpiringCollection(
                expire_after_seconds=expiry_seconds
            )
        self.root = TreeNode()
        self._awaiting_prune: List[TreeAddress] = []
        if initial_objects:  # not none or empty:
            if not isinstance(initial_objects, list):
                raise ValueError(
                    f"Expected list but got {initial_objects}. Did you"
                    f"forget [braces] for initial_objects value?"
                )
            for x in initial_objects:
                self.add(x)

    def add_all(self, objects: Iterable[DICOMObject]):
        if isinstance(objects, DICOMObject):
            # This mistake is common (for me) and causes unreadable errors. Avoid.
            raise ValueError(
                "parameter 'objects' should be an iterable of "
                "DICOMObjects, not a single object"
            )
        self.prune_expired()
        for obj in objects:
            self.add(obj, prune=False)

    def add(self, obj: DICOMObject, prune=True):
        """Add this object to cache

        Returns
        -------
        The input DICOMObject. Just so you can add and return a value in a single
        line in calling code. Like new_dicom = cache.add(get_new_dicom())
        """
        logger.debug(f"Adding to cache: {obj}")
        if prune:
            self.prune_expired()
        address = self.to_address(obj.reference())
        self.root.add(obj, address=address)
        if self.expiry:
            self.expiry.add(address)

        if obj.children():
            for x in obj.children():
                self.add(
                    x, prune=False
                )  # avoid too many calls to prune_expired

    def retrieve(self, reference: DICOMObjectReference):
        """Try to retrieve object from cache

        Parameters
        ----------
        reference
            The dicom object you would like to retrieve

        Raises
        ------
        NodeNotFound
            If the object does not exist in cache or has expired

        Returns
        -------
        DICOMObject
            The cached object
        """
        self.prune_expired()
        try:
            data = self.root.get_node(
                self.to_address(reference), create=False
            ).data
            if data:
                return data
            else:
                raise NodeNotFound(
                    f"Node found in cache, but no data for reference "
                    f"{reference}"
                )
        except KeyError as e:
            raise NodeNotFound(
                f"No node found in cache for reference {reference}"
            ) from e

    def prune_expired(self):
        """Remove all expired nodes"""
        if not self.expiry:
            logger.debug("prune: not pruning as self.expiry = False")
            return  # don't do anything
        expired = self.expiry.collect_expired()
        self._awaiting_prune = self._awaiting_prune + expired
        self._awaiting_prune.sort(key=lambda x: len(x))
        prune_later = []
        pruned = []
        while self._awaiting_prune:
            address = self._awaiting_prune.pop()  # work from last
            try:
                self.root.prune_leaf(address)
                pruned.append(address)
            except ValueError:
                #  was not a leaf. Make empty and save for later
                self.root.get_node(address).data = None
                prune_later.append(address)
        if pruned:
            msg = f"prune: Pruned away {len(pruned)} leaves: ({pruned})"
            if prune_later:
                msg += f"could not prune {len(prune_later)}. Leaving those for later"
            logger.debug(msg)

        self._awaiting_prune = prune_later

    @staticmethod
    def to_address(ref: DICOMObjectReference) -> TreeAddress:
        """Convert reference to address that can be used in TreeNode"""
        if isinstance(ref, StudyReference):
            return (ref.study_uid,)
        elif isinstance(ref, SeriesReference):
            return ref.study_uid, ref.series_uid
        elif isinstance(ref, InstanceReference):
            return ref.study_uid, ref.series_uid, ref.instance_uid
        else:
            raise ValueError(f"Expected DICOM object reference, but got {ref}")

__init__(initial_objects=None, expiry_seconds=600)

A tree holding expiring DICOM objects. Objects can be retrieved by study/series/instance UID tuples:

cache = DICOMObjectCache() cache.add(a_study) # with uid 'study1' and full series and instance info cache.retrieve(reference=('study1','series')) cache.retrieve(reference=('study1','series','instance'))

20 minutes later... study has expired

cache.retrieve(reference=('study1','series'))

Parameters

initial_objects, optional Shorthand for add() on all objects in this list. Defaults to empty

expiry_seconds, optional Expire objects after this many seconds. If set to None, will disable expiry. Defaults to 600 (10 minutes)

Notes

The functionality here is similar to dicomtrolley.parsing.TreeNode, but the use case is different enough to warrant a separate class I think

Source code in dicomtrolley/caching.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def __init__(
    self,
    initial_objects: Optional[List[DICOMObject]] = None,
    expiry_seconds: Optional[int] = 600,
):
    """A tree holding expiring DICOM objects. Objects can be retrieved by
    study/series/instance UID tuples:

    >>>cache = DICOMObjectCache()
    >>>cache.add(a_study)    # with uid 'study1' and full series and instance info
    >>>cache.retrieve(reference=('study1','series'))
    <Series>
    >>>cache.retrieve(reference=('study1','series','instance'))
    <Instance>
    # 20 minutes later... study has expired
    >>>cache.retrieve(reference=('study1','series'))
    <NodeNotFound "Data for study1,series1 is not cached">

    Parameters
    ----------
    initial_objects, optional
        Shorthand for add() on all objects in this list. Defaults to empty

    expiry_seconds, optional
        Expire objects after this many seconds. If set to None, will disable
        expiry. Defaults to 600 (10 minutes)

    Notes
    -----
    The functionality here is similar to dicomtrolley.parsing.TreeNode, but the
    use case is different enough to warrant a separate class I think

    """
    if expiry_seconds is None:
        self.expiry = None
    else:
        self.expiry = ExpiringCollection(
            expire_after_seconds=expiry_seconds
        )
    self.root = TreeNode()
    self._awaiting_prune: List[TreeAddress] = []
    if initial_objects:  # not none or empty:
        if not isinstance(initial_objects, list):
            raise ValueError(
                f"Expected list but got {initial_objects}. Did you"
                f"forget [braces] for initial_objects value?"
            )
        for x in initial_objects:
            self.add(x)

add(obj, prune=True)

Add this object to cache

Returns

The input DICOMObject. Just so you can add and return a value in a single line in calling code. Like new_dicom = cache.add(get_new_dicom())

Source code in dicomtrolley/caching.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def add(self, obj: DICOMObject, prune=True):
    """Add this object to cache

    Returns
    -------
    The input DICOMObject. Just so you can add and return a value in a single
    line in calling code. Like new_dicom = cache.add(get_new_dicom())
    """
    logger.debug(f"Adding to cache: {obj}")
    if prune:
        self.prune_expired()
    address = self.to_address(obj.reference())
    self.root.add(obj, address=address)
    if self.expiry:
        self.expiry.add(address)

    if obj.children():
        for x in obj.children():
            self.add(
                x, prune=False
            )  # avoid too many calls to prune_expired

prune_expired()

Remove all expired nodes

Source code in dicomtrolley/caching.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def prune_expired(self):
    """Remove all expired nodes"""
    if not self.expiry:
        logger.debug("prune: not pruning as self.expiry = False")
        return  # don't do anything
    expired = self.expiry.collect_expired()
    self._awaiting_prune = self._awaiting_prune + expired
    self._awaiting_prune.sort(key=lambda x: len(x))
    prune_later = []
    pruned = []
    while self._awaiting_prune:
        address = self._awaiting_prune.pop()  # work from last
        try:
            self.root.prune_leaf(address)
            pruned.append(address)
        except ValueError:
            #  was not a leaf. Make empty and save for later
            self.root.get_node(address).data = None
            prune_later.append(address)
    if pruned:
        msg = f"prune: Pruned away {len(pruned)} leaves: ({pruned})"
        if prune_later:
            msg += f"could not prune {len(prune_later)}. Leaving those for later"
        logger.debug(msg)

    self._awaiting_prune = prune_later

retrieve(reference)

Try to retrieve object from cache

Parameters

reference The dicom object you would like to retrieve

Raises

NodeNotFound If the object does not exist in cache or has expired

Returns

DICOMObject The cached object

Source code in dicomtrolley/caching.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def retrieve(self, reference: DICOMObjectReference):
    """Try to retrieve object from cache

    Parameters
    ----------
    reference
        The dicom object you would like to retrieve

    Raises
    ------
    NodeNotFound
        If the object does not exist in cache or has expired

    Returns
    -------
    DICOMObject
        The cached object
    """
    self.prune_expired()
    try:
        data = self.root.get_node(
            self.to_address(reference), create=False
        ).data
        if data:
            return data
        else:
            raise NodeNotFound(
                f"Node found in cache, but no data for reference "
                f"{reference}"
            )
    except KeyError as e:
        raise NodeNotFound(
            f"No node found in cache for reference {reference}"
        ) from e

to_address(ref) staticmethod

Convert reference to address that can be used in TreeNode

Source code in dicomtrolley/caching.py
174
175
176
177
178
179
180
181
182
183
184
@staticmethod
def to_address(ref: DICOMObjectReference) -> TreeAddress:
    """Convert reference to address that can be used in TreeNode"""
    if isinstance(ref, StudyReference):
        return (ref.study_uid,)
    elif isinstance(ref, SeriesReference):
        return ref.study_uid, ref.series_uid
    elif isinstance(ref, InstanceReference):
        return ref.study_uid, ref.series_uid, ref.instance_uid
    else:
        raise ValueError(f"Expected DICOM object reference, but got {ref}")

QueryCache

Caches the response to DICOM queries

Source code in dicomtrolley/caching.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
class QueryCache:
    """Caches the response to DICOM queries"""

    def __init__(self, cache: DICOMObjectCache):
        self.cache = cache
        self.queries: Dict[str, Tuple[DICOMObjectReference, ...]] = {}

    def add_response(self, query: Query, response: Sequence[DICOMObject]):
        """Cache response for this query"""
        self.cache.add_all(response)
        references = tuple(x.reference() for x in response)
        self.queries[query.model_dump_json()] = references

    def get_response(self, query: Query) -> List[Study]:
        """Obtain cached response for this query

        Raises
        ------
        NodeNotFound
            If any of the results of query are not in cache or have expired
        """
        try:
            references = self.queries[query.model_dump_json()]
        except KeyError as e:
            raise NodeNotFound(
                f"Query {query.to_short_string()} not found in cache"
            ) from e

        try:
            retrieved = [self.cache.retrieve(x) for x in references]
            logger.debug(
                f"Found all ({len(retrieved)}) objects in cache for "
                f"{query.to_short_string()}. Returning."
            )
            return retrieved
        except NodeNotFound as e:
            # This query response is not (fully) cached anymore. Remove
            self.queries.pop(query.model_dump_json())
            raise NodeNotFound(
                f"One or more response to {query.to_short_string()} "
                f"was not in cache"
            ) from e

add_response(query, response)

Cache response for this query

Source code in dicomtrolley/caching.py
194
195
196
197
198
def add_response(self, query: Query, response: Sequence[DICOMObject]):
    """Cache response for this query"""
    self.cache.add_all(response)
    references = tuple(x.reference() for x in response)
    self.queries[query.model_dump_json()] = references

get_response(query)

Obtain cached response for this query

Raises

NodeNotFound If any of the results of query are not in cache or have expired

Source code in dicomtrolley/caching.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
def get_response(self, query: Query) -> List[Study]:
    """Obtain cached response for this query

    Raises
    ------
    NodeNotFound
        If any of the results of query are not in cache or have expired
    """
    try:
        references = self.queries[query.model_dump_json()]
    except KeyError as e:
        raise NodeNotFound(
            f"Query {query.to_short_string()} not found in cache"
        ) from e

    try:
        retrieved = [self.cache.retrieve(x) for x in references]
        logger.debug(
            f"Found all ({len(retrieved)}) objects in cache for "
            f"{query.to_short_string()}. Returning."
        )
        return retrieved
    except NodeNotFound as e:
        # This query response is not (fully) cached anymore. Remove
        self.queries.pop(query.model_dump_json())
        raise NodeNotFound(
            f"One or more response to {query.to_short_string()} "
            f"was not in cache"
        ) from e