Source code for pylabel.exporter

"""PyLabel currently supports exporting annotations in COCO, YOLO, and VOC PASCAL formats."""

import json
from typing import List
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
import xml.dom.minidom
import os
import yaml
import shutil
from pylabel.shared import _ReindexCatIds
from tqdm import tqdm
import csv

from pathlib import PurePath, Path


[docs] class Export: def __init__(self, dataset=None): self.dataset = dataset
[docs] def ExportToVoc( self, output_path=None, segmented_=False, path_=False, database_=False, folder_=False, occluded_=False, ): """Writes annotation files to disk in VOC XML format and returns path to files. By default, tags with empty values will not be included in the XML output. You can optionally choose to include them if they are required for your solution. Args: output_path (str): This is where the annotation files will be written. If not-specified then the path will be derived from the .path_to_annotations and .name properties of the dataset object. segmented_ (bool) : Defaults to False. Set to true to include this field in the XML schema of the output files. path_ (bool) : Defaults to False. Set to true to include this field in the XML schema of the output files. database_ (bool) : Defaults to False. Set to true to include this field in the XML schema of the output files. folder_ (bool) : Defaults to False. Set to true to include this field in the XML schema of the output files. occluded_ (bool) : Defaults to False. Set to true to include this field in the XML schema of the output files. Returns: A list with 1 or more paths (strings) to annotations files. Example: >>> dataset.export.ExportToVoc() ['data/voc_annotations/000000000322.xml', ...] """ ds = self.dataset if output_path == None: output_path = ds.path_to_annotations else: output_path = output_path os.makedirs(output_path, exist_ok=True) output_file_paths = [] def voc_xml_file_creation( data, file_name, output_file_path, segmented=True, path=True, database=True, folder=True, occluded=True, ): index = 0 df_smaller = data[data["img_filename"] == file_name].reset_index() if len(df_smaller) == 1: # print('test') annotation_text_start = "<annotation>" flder_lkp = str(df_smaller.loc[index]["img_folder"]) if folder == True and flder_lkp != "": folder_text = "<folder>" + flder_lkp + "</folder>" else: folder_text = "" filename_text = ( "<filename>" + str(df_smaller.loc[index]["img_filename"]) + "</filename>" ) pth_lkp = str(df_smaller.loc[index]["img_path"]) if path == True and pth_lkp != "": path_text = "<path>" + pth_lkp + "</path>" else: path_text = "" sources_text = "" size_text_start = "<size>" width_text = ( "<width>" + str(df_smaller.loc[index]["img_width"]) + "</width>" ) height_text = ( "<height>" + str(df_smaller.loc[index]["img_height"]) + "</height>" ) depth_text = ( "<depth>" + str(df_smaller.loc[index]["img_depth"]) + "</depth>" ) size_text_end = "</size>" seg_lkp = str(df_smaller.loc[index]["ann_segmented"]) if segmented == True and seg_lkp != "": segmented_text = ( "<segmented>" + str(df_smaller.loc[index]["ann_segmented"]) + "</segmented>" ) else: segmented_text = "" # If the image has no annotations, skip this part of the output no_annotations = ( pd.isnull(df_smaller.loc[index]["cat_id"]) or df_smaller.loc[index]["cat_id"] == "" ) if not no_annotations: object_text_start = "<object>" name_text = ( "<name>" + str(df_smaller.loc[index]["cat_name"]) + "</name>" ) pose_text = ( "<pose>" + str(df_smaller.loc[index]["ann_pose"]) + "</pose>" ) truncated_text = ( "<truncated>" + str(df_smaller.loc[index]["ann_truncated"]) + "</truncated>" ) difficult_text = ( "<difficult>" + str(df_smaller.loc[index]["ann_difficult"]) + "</difficult>" ) occluded_text = "" bound_box_text_start = "<bndbox>" xmin_text = ( "<xmin>" + str(int(df_smaller.loc[index]["ann_bbox_xmin"])) + "</xmin>" ) xmax_text = ( "<xmax>" + str(int(df_smaller.loc[index]["ann_bbox_xmax"])) + "</xmax>" ) ymin_text = ( "<ymin>" + str(int(df_smaller.loc[index]["ann_bbox_ymin"])) + "</ymin>" ) ymax_text = ( "<ymax>" + str(int(df_smaller.loc[index]["ann_bbox_ymax"])) + "</ymax>" ) bound_box_text_end = "</bndbox>" object_text_end = "</object>" else: object_text_start = "" name_text = "" pose_text = "" truncated_text = "" difficult_text = "" occluded_text = "" bound_box_text_start = "" xmin_text = "" xmax_text = "" ymin_text = "" ymax_text = "" bound_box_text_end = "" object_text_end = "" # Continue this part even if there are no annotations for this image annotation_text_end = "</annotation>" xmlstring = ( annotation_text_start + folder_text + filename_text + path_text + sources_text + size_text_start + width_text + height_text + depth_text + size_text_end + segmented_text + object_text_start + name_text + pose_text + truncated_text + difficult_text + occluded_text + bound_box_text_start + xmin_text + xmax_text + ymin_text + ymax_text + bound_box_text_end + object_text_end + annotation_text_end ) dom = xml.dom.minidom.parseString(xmlstring) pretty_xml_as_string = dom.toprettyxml() with open(output_file_path, "w") as f: f.write(pretty_xml_as_string) return output_file_path else: # When there are more than one annotations for the image # print('test') annotation_text_start = "<annotation>" flder_lkp = str(df_smaller.loc[index]["img_folder"]) if folder == True and flder_lkp != "": folder_text = "<folder>" + flder_lkp + "</folder>" else: folder_text = "" filename_text = ( "<filename>" + str(df_smaller.loc[index]["img_filename"]) + "</filename>" ) pth_lkp = str(df_smaller.loc[index]["img_path"]) if path == True and pth_lkp != "": path_text = "<path>" + pth_lkp + "</path>" else: path_text = "" # db_lkp = str(df_smaller.loc[index]['Databases']) # if database == True and db_lkp != '': # sources_text = '<source>'+'<database>'+ db_lkp +'</database>'+'</source>' # else: sources_text = "" size_text_start = "<size>" width_text = ( "<width>" + str(df_smaller.loc[index]["img_width"]) + "</width>" ) height_text = ( "<height>" + str(df_smaller.loc[index]["img_height"]) + "</height>" ) depth_text = ( "<depth>" + str(df_smaller.loc[index]["img_depth"]) + "</depth>" ) size_text_end = "</size>" seg_lkp = str(df_smaller.loc[index]["ann_segmented"]) if segmented == True and seg_lkp != "": segmented_text = ( "<segmented>" + str(df_smaller.loc[index]["ann_segmented"]) + "</segmented>" ) else: segmented_text = "" xmlstring = ( annotation_text_start + folder_text + filename_text + path_text + sources_text + size_text_start + width_text + height_text + depth_text + size_text_end + segmented_text ) for obj in range(len(df_smaller)): object_text_start = "<object>" name_text = ( "<name>" + str(df_smaller.loc[obj]["cat_name"]) + "</name>" ) pose_text = ( "<pose>" + str(df_smaller.loc[obj]["ann_pose"]) + "</pose>" ) truncated_text = ( "<truncated>" + str(df_smaller.loc[obj]["ann_truncated"]) + "</truncated>" ) difficult_text = ( "<difficult>" + str(df_smaller.loc[obj]["ann_difficult"]) + "</difficult>" ) # occ_lkp = str(df_smaller.loc[index]['Object Occluded']) # if occluded==True and occ_lkp != '': # occluded_text = '<occluded>'+occ_lkp+'</occluded>' # else: occluded_text = "" bound_box_text_start = "<bndbox>" xmin_text = ( "<xmin>" + str(int(df_smaller.loc[obj]["ann_bbox_xmin"])) + "</xmin>" ) xmax_text = ( "<xmax>" + str(int(df_smaller.loc[obj]["ann_bbox_xmax"])) + "</xmax>" ) ymin_text = ( "<ymin>" + str(int(df_smaller.loc[obj]["ann_bbox_ymin"])) + "</ymin>" ) ymax_text = ( "<ymax>" + str(int(df_smaller.loc[obj]["ann_bbox_ymax"])) + "</ymax>" ) bound_box_text_end = "</bndbox>" object_text_end = "</object>" annotation_text_end = "</annotation>" index = index + 1 xmlstring = ( xmlstring + object_text_start + name_text + pose_text + truncated_text + difficult_text + occluded_text + bound_box_text_start + xmin_text + xmax_text + ymin_text + ymax_text + bound_box_text_end + object_text_end ) xmlstring = xmlstring + annotation_text_end dom = xml.dom.minidom.parseString(xmlstring) pretty_xml_as_string = dom.toprettyxml() with open(output_file_path, "w") as f: f.write(pretty_xml_as_string) return output_file_path # Loop through all images in the dataframe and call voc_xml_file_creation for each one pbar = tqdm( desc="Exporting VOC files...", total=len(list(set(self.dataset.df.img_filename))), ) for file_title in list(set(self.dataset.df.img_filename)): file_name = Path(file_title) file_name = str(file_name.with_suffix(".xml")) file_path = str(Path(output_path, file_name)) voc_file_path = voc_xml_file_creation( ds.df, file_title, segmented=segmented_, path=path_, database=database_, folder=folder_, occluded=occluded_, output_file_path=file_path, ) output_file_paths.append(voc_file_path) pbar.update() return output_file_paths
@staticmethod def _df_to_csv( df: pd.DataFrame, file_path: str, sep: str = " ", float_format: str = "%0.4f", columns: List[str] = None ): """ with the pandas to_csv method the output for a list (e.g. the keypoints) must include a quote character or an escape character. To avoid both of those, we need a custom function """ if columns is None: columns = df.columns def _format_float(fl: float, fl_format: str): if np.isnan(fl): return "" else: return fl_format % fl with open(file_path, "w") as f: for row in df[columns].itertuples(): row=row[1:] formatted_row = [] for x in row: if isinstance(x, float): formatted_row.append(_format_float(x, float_format)) elif isinstance(x, list): formatted_row.extend( [_format_float(y, float_format) if isinstance(y, float) else str(y) for y in x] ) else: formatted_row.append(str(x)) f.write(sep.join(formatted_row) + '\n')
[docs] def ExportToYoloV5( self, output_path="training/labels", yaml_file="dataset.yaml", copy_images=False, use_splits=False, cat_id_index=None, segmentation=False, keypoints=False, ): """Writes annotation files to disk in YOLOv5 format and returns the paths to files. Args: output_path (str): This is where the annotation files will be written. If not-specified then the path will be derived from the .path_to_annotations and .name properties of the dataset object. If you are exporting images to train a model, the recommended path to use is 'training/labels'. yaml_file (str): If a file name (string) is provided, a YOLOv5 YAML file will be created with entries for the files and classes in this dataset. It will be created in the parent of the output_path directory. The recommended name for the YAML file is 'dataset.yaml'. copy_images (boolean): If True, then the annotated images will be copied to a directory next to the labels directory into a directory named 'images'. This will prepare your labels and images to be used as inputs to train a YOLOv5 model. use_splits (boolean): If True, then the images and annotations will be moved into directories based on the values in the split column. For example, if a row has the value split = "train" then the annotations for that row will be moved to directory /train. If a YAML file is specificied then the YAML file will use the splits to specify the folders user for the train, val, and test datasets. cat_id_index (int): Reindex the cat_id values so that they start from an int (usually 0 or 1) and then increment the cat_ids to index + number of categories continuously. It's useful if the cat_ids are not continuous in the original dataset. Yolo requires the set of annotations to start at 0 when training a model. segmentation (boolean): If true, then segmentation annotations will be exported instead of bounding box annotations. If there are no segmentation annotations, then no annotations will be empty. keypoints (boolean): If true, then keypoint annotations will be exported as well as bounding box annotations. It is not possible to export both segmentation and keypoint annotations at the same time in YOLO format. Each bounding box within a dataset should have the same number of keypoints defined e.g. 17 for COCO. Keypoints are a triplet of (x, y, visibility), see e.g. https://cocodataset.org/#format-data If some images have no keypoint annotations, then the bounding boxes will be followed by a series of delimiting spaces. If some bounding boxes within an image have no keypoint annotations, those keypoints will be a series of zeroes, denoting x=0, y=0, visibility=0. Returns: A list with 1 or more paths (strings) to annotations files. If a YAML file is created then the first item in the list will be the path to the YAML file. Examples: >>> dataset.export.ExportToYoloV5(output_path='training/labels', >>> yaml_file='dataset.yaml', cat_id_index=0) ['training/dataset.yaml', 'training/labels/frame_0002.txt', ...] """ ds = self.dataset assert not (segmentation and keypoints), "Only one of segmentation and keypoints can be exported in YOLO format" # Inspired by https://github.com/aws-samples/groundtruth-object-detection/blob/master/create_annot.py yolo_dataset = ds.df.copy(deep=True) # Convert nan values in the split column from nan to '' because those are easier to work with with when building paths yolo_dataset.split = yolo_dataset.split.fillna("") # Create all of the paths that will be used to manage the files in this dataset path_dict = {} # The output path is the main path that will be used to create the other relative paths path = PurePath(output_path) path_dict["label_path"] = output_path # The /images directory should be next to the /labels directory path_dict["image_path"] = str(PurePath(path.parent, "images")) # The root directory is in parent of the /labels and /images directories path_dict["root_path"] = str(PurePath(path.parent)) # The YAML file should be in root directory path_dict["yaml_path"] = str(PurePath(path_dict["root_path"], yaml_file)) # The root directory will usually be next to the yolov5 directory. # Specify the relative path path_dict["root_path_from_yolo_dir"] = str(PurePath("../")) # If these default values to not match the users environment then they can manually edit the YAML file if copy_images: # Create the folder that the images will be copied to Path(path_dict["image_path"]).mkdir(parents=True, exist_ok=True) # Drop rows that are not annotated # Note, having zero annotates can still be considered annotated # in cases when are no objects in the image thats should be indentified yolo_dataset = yolo_dataset.loc[yolo_dataset["annotated"] == 1] # yolo_dataset["cat_id"] = ( # yolo_dataset["cat_id"].astype("float").astype(pd.Int32Dtype()) # ) yolo_dataset.cat_id = yolo_dataset.cat_id.replace(r"^\s*$", np.nan, regex=True) pd.to_numeric(yolo_dataset["cat_id"]) if cat_id_index != None: assert isinstance(cat_id_index, int), "cat_id_index must be an int." _ReindexCatIds(yolo_dataset, cat_id_index) # Convert empty bbox coordinates to nan to avoid math errors # If an image has no annotations then an empty label file will be created yolo_dataset.ann_bbox_xmin = yolo_dataset.ann_bbox_xmin.replace( r"^\s*$", np.nan, regex=True ) yolo_dataset.ann_bbox_ymin = yolo_dataset.ann_bbox_ymin.replace( r"^\s*$", np.nan, regex=True ) yolo_dataset.ann_bbox_width = yolo_dataset.ann_bbox_width.replace( r"^\s*$", np.nan, regex=True ) yolo_dataset.ann_bbox_height = yolo_dataset.ann_bbox_height.replace( r"^\s*$", np.nan, regex=True ) # If segmentation = False then export bounding boxes if segmentation == False: yolo_dataset["center_x_scaled"] = ( yolo_dataset["ann_bbox_xmin"] + (yolo_dataset["ann_bbox_width"] * 0.5) ) / yolo_dataset["img_width"] yolo_dataset["center_y_scaled"] = ( yolo_dataset["ann_bbox_ymin"] + (yolo_dataset["ann_bbox_height"] * 0.5) ) / yolo_dataset["img_height"] yolo_dataset["width_scaled"] = ( yolo_dataset["ann_bbox_width"] / yolo_dataset["img_width"] ) yolo_dataset["height_scaled"] = ( yolo_dataset["ann_bbox_height"] / yolo_dataset["img_height"] ) if keypoints: keypoints_yolo = [[] for _ in range(len(yolo_dataset.index))] for img_ix, row in yolo_dataset.iterrows(): img_width = row["img_width"] img_height = row["img_height"] keypoints_coco = row["ann_keypoints"] if keypoints_coco: for bbox_ix, kp in enumerate(keypoints_coco): if bbox_ix % 3 == 0: # x coordinate keypoints_yolo[img_ix].append(kp / img_width) elif bbox_ix % 3 == 1: # y coordinate keypoints_yolo[img_ix].append(kp / img_height) else: # visibility keypoints_yolo[img_ix].append(kp) yolo_dataset["keypoints_scaled"] = keypoints_yolo # Create folders to store annotations if output_path == None: dest_folder = PurePath( ds.path_to_annotations, yolo_dataset.iloc[0].img_folder ) else: dest_folder = output_path os.makedirs(dest_folder, exist_ok=True) unique_images = yolo_dataset["img_filename"].unique() output_file_paths = [] pbar = tqdm(desc="Exporting YOLO files...", total=len(unique_images)) for img_filename in unique_images: df_single_img_annots = yolo_dataset.loc[ yolo_dataset.img_filename == img_filename ] basename, _ = os.path.splitext(img_filename) annot_txt_file = basename + ".txt" # Use the value of the split collumn to create a directory # The values should be train, val, test or '' if use_splits: split_dir = df_single_img_annots.iloc[0].split else: split_dir = "" destination = str(PurePath(dest_folder, split_dir, annot_txt_file)) Path( dest_folder, split_dir, ).mkdir(parents=True, exist_ok=True) # If segmentation = false then output bounding boxes if segmentation == False: columns = [ "cat_id", "center_x_scaled", "center_y_scaled", "width_scaled", "height_scaled", ] if keypoints: columns.append("keypoints_scaled") self._df_to_csv( df=df_single_img_annots, file_path=destination, sep=" ", float_format="%.4f", columns=columns ) # If segmentation = true then output the segmentation mask else: # Create one file for image with open(destination, "w") as file: # Create one row per row in the data frame for i in range(0, df_single_img_annots.shape[0]): row = str(df_single_img_annots.iloc[i].cat_id) segmentation_array = df_single_img_annots.iloc[ i ].ann_segmentation[0] # Iterate through every value of the segmentation array # To normalize the coordinates from 0-1 for index, l in enumerate(segmentation_array): # The first number in the array is the x value so divide by the width if index % 2 == 0: row += " " + ( str( segmentation_array[index] / df_single_img_annots.iloc[i].img_width ) ) else: # The first number in the array is the x value so divide by the height row += " " + ( str( segmentation_array[index] / df_single_img_annots.iloc[i].img_height ) ) file.write(row + "\n") output_file_paths.append(destination) if copy_images: source_image_path = str( Path( ds.path_to_annotations, df_single_img_annots.iloc[0].img_folder, df_single_img_annots.iloc[0].img_filename, ) ) current_file = Path(source_image_path) assert ( current_file.is_file ), f"File does not exist: {source_image_path}. Check img_folder column values." Path(path_dict["image_path"], split_dir).mkdir( parents=True, exist_ok=True ) shutil.copy( str(source_image_path), str(PurePath(path_dict["image_path"], split_dir, img_filename)), ) pbar.update() # Create YAML file if yaml_file: # Make a set with all of the different values of the split column splits = set(yolo_dataset.split) # Build a dict with all of the values that will go into the YAML file dict_file = {} dict_file["path"] = path_dict["root_path_from_yolo_dir"] # If train is one of the splits, append train to path if use_splits and "train" in splits: dict_file["train"] = str(PurePath(path_dict["image_path"], "train")) else: dict_file["train"] = path_dict["image_path"] # If val is one of the splits, append val to path if use_splits and "val" in splits: dict_file["val"] = str(PurePath(path_dict["image_path"], "val")) else: # If there is no val split, use the train split as the val split dict_file["val"] = dict_file["train"] # If test is one of the splits, make a test param and add test to the path if use_splits and "test" in splits: dict_file["test"] = str(PurePath(path_dict["image_path"], "test")) dict_file["nc"] = ds.analyze.num_classes dict_file["names"] = ds.analyze.classes # Save the yamlfile with open(path_dict["yaml_path"], "w") as file: documents = yaml.dump(dict_file, file,encoding="utf-8",allow_unicode=True) output_file_paths = [path_dict["yaml_path"]] + output_file_paths return output_file_paths
[docs] def ExportToCoco(self, output_path=None, cat_id_index=None): """ Writes COCO annotation files to disk (in JSON format) and returns the path to files. Args: output_path (str): This is where the annotation files will be written. If not-specified then the path will be derived from the path_to_annotations and name properties of the dataset object. cat_id_index (int): Reindex the cat_id values so that they start from an int (usually 0 or 1) and then increment the cat_ids to index + number of categories continuously. It's useful if the cat_ids are not continuous in the original dataset. Some models like Yolo require starting from 0 and others like Detectron require starting from 1. Returns: A list with 1 or more paths (strings) to annotations files. Example: >>> dataset.exporter.ExportToCoco() ['data/labels/dataset.json'] """ # Copy the dataframe in the dataset so the original dataset doesn't change when you apply the export tranformations df = self.dataset.df.copy(deep=True) # Replace empty string values with NaN df = df.replace(r"^\s*$", np.nan, regex=True) pd.to_numeric(df["cat_id"]) df["ann_iscrowd"] = df["ann_iscrowd"].fillna(0) if cat_id_index != None: assert isinstance(cat_id_index, int), "cat_id_index must be an int." _ReindexCatIds(df, cat_id_index) df_outputI = [] df_outputA = [] df_outputC = [] list_i = [] list_c = [] json_list = [] pbar = tqdm(desc="Exporting to COCO file...", total=df.shape[0]) for i in range(0, df.shape[0]): images = [ { "id": df["img_id"][i], "folder": df["img_folder"][i], "file_name": df["img_filename"][i], "path": df["img_path"][i], "width": df["img_width"][i], "height": df["img_height"][i], "depth": df["img_depth"][i], } ] # Skip this if cat_id is na if not pd.isna(df["cat_id"][i]): annotations = [ { "image_id": df["img_id"][i], "id": df.index[i], "segmented": df["ann_segmented"][i], "bbox": [ df["ann_bbox_xmin"][i], df["ann_bbox_ymin"][i], df["ann_bbox_width"][i], df["ann_bbox_height"][i], ], "area": df["ann_area"][i], "segmentation": df["ann_segmentation"][i], "iscrowd": df["ann_iscrowd"][i], "pose": df["ann_pose"][i], "truncated": df["ann_truncated"][i], "category_id": int(df["cat_id"][i]), "difficult": df["ann_difficult"][i], } ] # include keypoints, if available if "ann_keypoints" in df.keys() and (not np.isnan(df["ann_keypoints"][i]).all()): keypoints = df["ann_keypoints"][i] if isinstance(keypoints, list): n_keypoints = int(len(keypoints) / 3) # 3 numbers per keypoint: x,y,visibility elif isinstance(keypoints, np.ndarray): n_keypoints = int(keypoints.size / 3) # 3 numbers per keypoint: x,y,visibility else: raise TypeError('The keypoints array is expected to be either a list or a numpy array') annotations[0]["num_keypoints"] = n_keypoints annotations[0]["keypoints"] = keypoints else: pass categories = [ { "id": int(df["cat_id"][i]), "name": df["cat_name"][i], "supercategory": df["cat_supercategory"][i], } ] # Check if the list is empty if list_c: if categories[0]["id"] in list_c: pass else: categories[0]["id"] = int(categories[0]["id"]) df_outputC.append(pd.DataFrame([categories])) elif not pd.isna(categories[0]["id"]): categories[0]["id"] = int(categories[0]["id"]) df_outputC.append(pd.DataFrame([categories])) else: pass list_c.append(categories[0]["id"]) if list_i: if images[0]["id"] in list_i or np.isnan(images[0]["id"]): pass else: df_outputI.append(pd.DataFrame([images])) elif ~np.isnan(images[0]["id"]): df_outputI.append(pd.DataFrame([images])) else: pass list_i.append(images[0]["id"]) # If the class id is blank, then there is no annotation to add if not pd.isna(categories[0]["id"]): df_outputA.append(pd.DataFrame([annotations])) pbar.update() mergedI = pd.concat(df_outputI, ignore_index=True) mergedA = pd.concat(df_outputA, ignore_index=True) mergedC = pd.concat(df_outputC, ignore_index=True) resultI = mergedI[0].to_json(orient="split", default_handler=str) resultA = mergedA[0].to_json(orient="split", default_handler=str) resultC = mergedC[0].to_json(orient="split", default_handler=str) parsedI = json.loads(resultI) del parsedI["index"] del parsedI["name"] parsedI["images"] = parsedI["data"] del parsedI["data"] parsedA = json.loads(resultA) del parsedA["index"] del parsedA["name"] parsedA["annotations"] = parsedA["data"] del parsedA["data"] parsedC = json.loads(resultC) del parsedC["index"] del parsedC["name"] parsedC["categories"] = parsedC["data"] del parsedC["data"] parsedI.update(parsedA) parsedI.update(parsedC) json_output = parsedI if output_path == None: output_path = Path( self.dataset.path_to_annotations, (self.dataset.name + ".json") ) with open(output_path, "w") as outfile: json.dump(obj=json_output, fp=outfile, indent=4) return [str(output_path)]