Video Annotation

class supervisely_lib.video_annotation.video_annotation.VideoAnnotation(img_size: Tuple[int, int], frames_count: int, objects: Optional[supervisely_lib.video_annotation.video_object_collection.VideoObjectCollection] = None, frames: Optional[supervisely_lib.video_annotation.frame_collection.FrameCollection] = None, tags: Optional[supervisely_lib.video_annotation.video_tag_collection.VideoTagCollection] = None, description: str = '', key: Optional[uuid.UUID] = None)[source]

Bases: object

VideoAnnotation for a single video. VideoAnnotation object is immutable.

Parameters
  • img_size (Tuple[int, int] or List[int, int]) – Size of the image (height, width).

  • frames_count (int) – Number of frames in VideoAnnotation.

  • objects (VideoObjectCollection, optional) – VideoObjectCollection object.

  • frames (FrameCollection, optional) – FrameCollection object.

  • tags (VideoTagCollection, optional) – VideoTagCollection object.

  • description (str, optional) – Video description.

  • key (UUID, optional) – UUID object.

Raises

TypeError, if img_size is not tuple or list

Usage example
# Simple VideoAnnotation example
height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "abef780b01ad4063b4b961ab2ba2f410",
#     "tags": [],
#     "objects": [],
#     "frames": [],
#     "framesCount": 10
# }

# More complex VideoAnnotation example

height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
# FrameCollection
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
# VideoTagCollection
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
from supervisely_lib.video_annotation.video_tag import VideoTag
vid_tag = VideoTag(meta_car, value='acura')
from supervisely_lib.video_annotation.video_tag_collection import VideoTagCollection
video_tags = VideoTagCollection([vid_tag])
# Description
descr = 'car example'

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, video_tags, descr)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "car example",
#     "key": "a85b282e5e174e7ebad6f878b6919244",
#     "tags": [
#         {
#             "name": "car_tag",
#             "value": "acura",
#             "key": "540a8212b0344788953996cea220ea8b"
#         }
#     ],
#     "objects": [
#         {
#             "key": "7c74b8a495044ea0ac127f32751c8f5c",
#             "classTitle": "car",
#             "tags": []
#         }
#     ],
#     "frames": [
#         {
#             "index": 7,
#             "figures": [
#                 {
#                     "key": "82dcbf2e3c5f42a99eeea2ad34173793",
#                     "objectKey": "7c74b8a495044ea0ac127f32751c8f5c",
#                     "geometryType": "rectangle",
#                     "geometry": {
#                         "points": {
#                             "exterior": [
#                                 [
#                                     0,
#                                     0
#                                 ],
#                                 [
#                                     100,
#                                     100
#                                 ]
#                             ],
#                             "interior": []
#                         }
#                     }
#                 }
#             ]
#         }
#     ],
#     "framesCount": 1
# }
property img_size

Size of the image (height, width).

Returns

Image size

Return type

Tuple[int, int]

Usage example
height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.img_size)
# Output: (500, 700)
property frames_count

Number of frames.

Returns

Frames count

Return type

int

Usage example
height, width = 500, 700
frames_count = 15
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.frames_count)
# Output: 15
property objects

VideoAnnotation objects.

Returns

VideoObjectCollection object

Return type

VideoObjectCollection

Usage example
height, width = 500, 700
frames_count = 1
# VideoObjectCollection
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
video_ann = sly.VideoAnnotation((height, width), frames_count, objects)
print(video_ann.objects.to_json())
# Output: [
#     {
#         "key": "79fc07a4a6ca4b2796279bc033b9ec9a",
#         "classTitle": "car",
#         "tags": []
#     }
# ]
property frames

VideoAnnotation frames.

Returns

FrameCollection object

Return type

FrameCollection

Usage example
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(video_ann.frames.to_json())
# Output: [
#     {
#         "index": 7,
#         "figures": [
#             {
#                 "key": "2842f561b1924f6abd6ab6f696ed9b65",
#                 "objectKey": "7f30fa9b78444ad69e02b37edbf9a902",
#                 "geometryType": "rectangle",
#                 "geometry": {
#                     "points": {
#                         "exterior": [
#                             [
#                                 0,
#                                 0
#                             ],
#                             [
#                                 100,
#                                 100
#                             ]
#                         ],
#                         "interior": []
#                     }
#                 }
#             }
#         ]
#     }
# ]
property figures

VideoAnnotation figures.

Returns

List of VideoFigures from all frames in VideoAnnotation

Return type

List[VideoFigure]

Usage example
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
print(len(video_ann.figures)) # 1
property tags

VideoAnnotation tags.

Returns

VideoTagCollection object

Return type

VideoTagCollection

Usage example
height, width = 500, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])
meta_car = sly.TagMeta('car_tag', sly.TagValueType.ANY_STRING)
from supervisely_lib.video_annotation.video_tag import VideoTag
vid_tag = VideoTag(meta_car, value='acura')
from supervisely_lib.video_annotation.video_tag_collection import VideoTagCollection
tags = VideoTagCollection([vid_tag])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames, tags)
print(video_ann.tags.to_json())
# Output: [
#     {
#         "name": "car_tag",
#         "value": "acura",
#         "key": "c63e8259589a4fa5b4fb15a48c1f6a63"
#     }
# ]
key()[source]
property description

Video description.

Returns

Video description

Return type

str

Usage example
height, width = 500, 700
frames_count = 1
descr = 'example'
video_ann = sly.VideoAnnotation((height, width), frames_count, description=descr)
print(video_ann.description) # example
validate_figures_bounds() → None[source]

Checks if image contains figures from all frames in collection.

Raises

OutOfImageBoundsExtension, if figure is out of image bounds

Returns

None

Return type

NoneType

Usage Example
height, width = 50, 700
frames_count = 1
obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
objects = sly.VideoObjectCollection([video_obj_car])
fr_index = 7
geometry = sly.Rectangle(0, 0, 100, 100)
video_figure_car = sly.VideoFigure(video_obj_car, geometry, fr_index)
frame = sly.Frame(fr_index, figures=[video_figure_car])
frames = sly.FrameCollection([frame])

video_ann = sly.VideoAnnotation((height, width), frames_count, objects, frames)
video_ann.validate_figures_bounds()
# raise OutOfImageBoundsExtension("Figure is out of image bounds")
to_json(key_id_map: Optional[supervisely_lib.video_annotation.key_id_map.KeyIdMap] = None) → dict[source]

Convert the VideoAnnotation to a json dict. Read more about Supervisely format.

Parameters

key_id_map (KeyIdMap, optional) – KeyIdMap object.

Returns

Json format as a dict

Return type

dict

Usage example
height, width = 500, 700
frames_count = 10
video_ann = sly.VideoAnnotation((height, width), frames_count)
print(video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "abef780b01ad4063b4b961ab2ba2f410",
#     "tags": [],
#     "objects": [],
#     "frames": [],
#     "framesCount": 10
# }
classmethod from_json(data: dict, project_meta: supervisely_lib.project.project_meta.ProjectMeta, key_id_map: Optional[supervisely_lib.video_annotation.key_id_map.KeyIdMap] = None)supervisely_lib.video_annotation.video_annotation.VideoAnnotation[source]

Convert a json dict to VideoAnnotation. Read more about Supervisely format.

Parameters
Returns

VideoAnnotation object

Return type

VideoAnnotation

Usage example
video_ann_json = {
    "size": {
        "height": 500,
        "width": 700
    },
    "tags": [],
    "objects": [],
    "frames": [],
    "framesCount": 1
}
meta = sly.ProjectMeta()
video_ann = sly.VideoAnnotation.from_json(video_ann_json, meta)
clone(img_size: Optional[Tuple[int, int]] = None, frames_count: Optional[int] = None, objects: Optional[supervisely_lib.video_annotation.video_object_collection.VideoObjectCollection] = None, frames: Optional[supervisely_lib.video_annotation.frame_collection.FrameCollection] = None, tags: Optional[supervisely_lib.video_annotation.video_tag_collection.VideoTagCollection] = None, description: Optional[str] = None)supervisely_lib.video_annotation.video_annotation.VideoAnnotation[source]

Makes a copy of VideoAnnotation with new fields, if fields are given, otherwise it will use fields of the original VideoAnnotation.

Parameters
  • img_size (Tuple[int, int], optional) – Size of the image (height, width).

  • frames_count (int, optional) – Number of frames in VideoAnnotation.

  • objects (VideoObjectCollection, optional) – VideoObjectCollection object.

  • frames (FrameCollection, optional) – FrameCollection object.

  • tags (VideoTagCollection, optional) – VideoTagCollection object.

  • description (str, optional) – Video description.

Raises

TypeError, if img_size is not tuple or list

Usage example
height, width = 500, 700
frames_count = 1
video_ann = sly.VideoAnnotation((height, width), frames_count)

obj_class_car = sly.ObjClass('car', sly.Rectangle)
video_obj_car = sly.VideoObject(obj_class_car)
new_objects = sly.VideoObjectCollection([video_obj_car])
new_video_ann = video_ann.clone(objects=new_objects)
print(new_video_ann.to_json())
# Output: {
#     "size": {
#         "height": 500,
#         "width": 700
#     },
#     "description": "",
#     "key": "37f7d267864c4fd8b1a1a32f67e37f7d",
#     "tags": [],
#     "objects": [
#         {
#             "key": "27d4ba1aaee64930b2d0bfb7e8b53493",
#             "classTitle": "car",
#             "tags": []
#         }
#     ],
#     "frames": [],
#     "framesCount": 1
# }