In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [2]:
multimodal_data_info_file_path ='multimodal_data_info.json'

In [3]:
def read_multimodal_data_information_json_file(json_file_path="multimodal_data_info.json"):
    """
    :param json_file_path:
    :return: multimodal_data_information_list
            [{'mp4_id': '97930081', 'mp4_download_url': ...'video_label': 'Military'},
            {'mp4_id': '64413672', 'mp4_download_url': ... 'video_label': 'Military'}]
    """
    def check_data(line_dict):
        for item in ['mp4_id', 'video_label', 'mp4_time', 'mp4_download_url', 'mp4_background_image_url', 'mp4_txt_brief']:
            if item not in line_dict:
                return False
        return True
        
    multimodal_data_information_list = list()
    with open(json_file_path, 'r', encoding='utf-8') as f:
        try:
            while True:
                line = f.readline()
                if line:
                    line_dict = json.loads(line)
                    if check_data(line_dict):
                        multimodal_data_information_list.append(line_dict)
                    else:
                        print("incomplete data:")
                        print(line_dict)
                else:
                    break
        except:
            f.close()
    return multimodal_data_information_list

In [4]:
multimodal_data_information_list = read_multimodal_data_information_json_file(multimodal_data_info_file_path)

In [5]:
len(multimodal_data_information_list)

562342

In [6]:
multimodal_data_information_list[:3]

[{'mp4_id': '75265848',
  'mp4_download_url': 'https://p5-v1.xpccdn.com/075265848_main_xl.mp4',
  'mp4_time': '0:13',
  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/075265848_iconl.jpeg',
  'mp4_txt_brief': ' Old antique German military rifle',
  'video_label': 'Military'},
 {'mp4_id': '44566064',
  'mp4_download_url': 'https://p5-v1.xpccdn.com/044566064_main_xl.mp4',
  'mp4_time': '0:09',
  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/044566064_iconl.jpeg',
  'mp4_txt_brief': ' quadcopter aerial drone',
  'video_label': 'Military'},
 {'mp4_id': '62447549',
  'mp4_download_url': 'https://p5-v1.xpccdn.com/062447549_main_xl.mp4',
  'mp4_time': '0:06',
  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/062447549_iconl.jpeg',
  'mp4_txt_brief': ' Firearm dis-assembly for cleaning and safety check of handheld gun',
  'video_label': 'Military'}]

In [7]:
def multimodal_data_json_file_to_datafram(json_file_path="multimodal_data_info.json"):
    """
    :param json_file_path: 
    :return: pandas.datafram
    """
    multimodal_data_information_list = read_multimodal_data_information_json_file(json_file_path)
    
    multimodal_data_information_dict = {'mp4_id':[], 'video_label':[], 'mp4_time':[], 
                                        'mp4_download_url':[], 'mp4_background_image_url':[], 'mp4_txt_brief':[]}
    
    for data in multimodal_data_information_list:
        multimodal_data_information_dict['mp4_id'].append(data['mp4_id'])
        multimodal_data_information_dict['video_label'].append(data['video_label'])
        multimodal_data_information_dict['mp4_time'].append(data['mp4_time'])
        multimodal_data_information_dict['mp4_download_url'].append(data['mp4_download_url'])
        multimodal_data_information_dict['mp4_background_image_url'].append(data['mp4_background_image_url'])
        multimodal_data_information_dict['mp4_txt_brief'].append(data['mp4_txt_brief'])
        
    multimodal_data_information_datafram = pd.DataFrame(multimodal_data_information_dict)
    
    return multimodal_data_information_datafram

In [8]:
multimodal_data_information_datafram = multimodal_data_json_file_to_datafram(json_file_path="multimodal_data_info.json")

In [9]:
multimodal_data_information_datafram.head()

Unnamed: 0,mp4_id,video_label,mp4_time,mp4_download_url,mp4_background_image_url,mp4_txt_brief
0,75265848,Military,0:13,https://p5-v1.xpccdn.com/075265848_main_xl.mp4,https://p5-i1.xpccdn.com/075265848_iconl.jpeg,Old antique German military rifle
1,44566064,Military,0:09,https://p5-v1.xpccdn.com/044566064_main_xl.mp4,https://p5-i1.xpccdn.com/044566064_iconl.jpeg,quadcopter aerial drone
2,62447549,Military,0:06,https://p5-v1.xpccdn.com/062447549_main_xl.mp4,https://p5-i1.xpccdn.com/062447549_iconl.jpeg,Firearm dis-assembly for cleaning and safety ...
3,42966432,Military,0:08,https://p5-v1.xpccdn.com/042966432_main_xl.mp4,https://p5-i1.xpccdn.com/042966432_iconl.jpeg,Kalashnikov deadly weapon
4,103424272,Military,0:13,https://p5-v1.xpccdn.com/103424272_main_xl.mp4,https://p5-i1.xpccdn.com/103424272_iconl.jpeg,Rows of ammunition in front of an animated Le...


In [10]:
multimodal_data_information_datafram.describe()

Unnamed: 0,mp4_id,video_label,mp4_time,mp4_download_url,mp4_background_image_url,mp4_txt_brief
count,562342,562342,562342,562342,562342,562342
unique,499607,31,184,499607,499607,343020
top,88460884,Alpha Channel,0:10,https://p5-v1.xpccdn.com/023726153_main_xl.mp4,https://p5-i1.xpccdn.com/088460884_iconl.jpeg,Intro Background Texture Render Animation Col...
freq,9,19200,49660,9,9,10974
