2017-12-14 4 views
2

Ich möchte diese json Antwort analysieren:Parse Nested JSON mit Python/Pandas

{ 
    "count":2, 
    "next":null, 
    "previous":null, 
    "results":[ 
     { 
     "id":123, 
     "type_vname":"Suspicious Remote Desktop", 
     "category":"LATERAL MOVEMENT", 
     "src_ip":"192.168.1.1", 
     "state":"fixed", 
     "description":null, 
     "t_score":70, 
     "c_score":70, 
     "first_timestamp":"2017-12-13T18:51:22Z", 
     "last_timestamp":"2017-12-13T18:51:22Z", 
     "detection_detail_set":[ 
      { 
       "id":1234567, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":1234, 
       "dst_ip":"192.168.1.1", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-13T18:51:22Z", 
       "last_timestamp":"2017-12-13T18:51:22Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      }, 
      { 
       "id":89123456, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":5678, 
       "dst_ip":"192.168.1.1", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-13T18:50:18Z", 
       "last_timestamp":"2017-12-13T18:50:18Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      } 
     ], 
     "dns_set":[ 

     ], 
     "relayed_comm_set":[ 

     ], 
     "sensor_luid":"abc1pdj", 
     "summary":{ 
      "internal_targets":1, 
      "anomalous_events":2, 
      "probable_owner":"user" 
     }, 
     "host":"https://192.168.1.2/api/detection_details", 
     "url":"https://192.168.1.2/api/detection_details", 
     "tags":[ 

     ], 
     "targets_key_asset":false, 
     "triage_rule_id":null 
     }, 
     { 
     "id":1235, 
     "type_vname":"Suspicious Remote Desktop", 
     "category":"LATERAL MOVEMENT", 
     "src_ip":"192.168.1.2", 
     "state":"fixed", 
     "description":null, 
     "t_score":70, 
     "c_score":70, 
     "first_timestamp":"2017-12-11T19:11:46Z", 
     "last_timestamp":"2017-12-11T19:11:46Z", 
     "detection_detail_set":[ 
      { 
       "id":123445, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":4958, 
       "dst_ip":"192.168.1.2", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-11T19:11:46Z", 
       "last_timestamp":"2017-12-11T19:11:46Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      }, 
      { 
       "id":1274857, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":15423, 
       "dst_ip":"192.168.1.2", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-11T19:11:46Z", 
       "last_timestamp":"2017-12-11T19:11:46Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      }, 
      { 
       "id":137847, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":93238, 
       "dst_ip":"192.168.1.2", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-11T19:10:53Z", 
       "last_timestamp":"2017-12-11T19:10:53Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      }, 
      { 
       "id":2376849874, 
       "description":"Suspicious Remote Desktop", 
       "dst_host_id":15423, 
       "dst_ip":"192.168.1.2", 
       "count":null, 
       "count_pos":null, 
       "dst_dns":null, 
       "dst_port":80, 
       "dst_geo":null, 
       "proto":null, 
       "first_timestamp":"2017-12-11T19:10:53Z", 
       "last_timestamp":"2017-12-11T19:10:53Z", 
       "total_bytes_sent":null, 
       "total_bytes_rcvd":null, 
       "url":"https://192.168.1.2/api/detection_details" 
      } 
     ], 
     "dns_set":[ 

     ], 
     "relayed_comm_set":[ 

     ], 
     "sensor_luid":"abcery", 
     "summary":{ 
      "internal_targets":1, 
      "anomalous_events":4, 
      "probable_owner":"user" 
     }, 
     "host":"https://192.168.1.2/api/detection_details", 
     "url":"https://192.168.1.2/api/detection_details", 
     "tags":[ 

     ], 
     "targets_key_asset":false, 
     "triage_rule_id":null 
     } 
    ] 
} 

Zu einem Datenrahmen, so kann ich mit den folgenden Überschriften für die JSON-Daten in eine CSV-Datei to_csv:

count 
next 
previous 
results_id 
results_type_vname 
results_category 
results_src_ip 
results_state 
results_description 
results_t_score 
results_c_score 
results_first_timestamp 
results_last_timestamp 
results_dns_set 
results_relayed_comm_set 
results_sensor_luid 
results_host 
results_url 
results_tags 
results_targets_key_asset 
results_triage_rule_id 
summary_internal_targets 
summary_anomalous_events 
summary_probable_owner 
detection_id 
detection_description 
detection_dst_host_id 
detection_dst_ip 
detection_count 
detection_count_pos 
detection_dst_dns 
detection_dst_port 
detection_dst_geo 
detection_proto 
detection_first_timestamp 
detection_last_timestamp 
detection_total_bytes_sent 
detection_total_bytes_rcvd 
detection_url 

I SO habe gesucht und hier einige meinen eigenen Code geschrieben (json Antwort ist in 'Daten'):

import pandas as pd 
from pandas.io.json import json_normalize 

df = pd.DataFrame(data) 
df = json_normalize(data=df['results'], record_path='detection_detail_set', 
          meta=['category', 'id'], record_prefix='results_', errors='ignore') 

df = df.head() 

df.to_csv('Output.csv', index=False) 

bekomme ich folgenden Kopf ers (mit Daten) in der Antwort:

results_count 
results_count_pos 
results_description 
results_dst_dns 
results_dst_geo 
results_dst_host_id 
results_dst_ip 
results_dst_port 
results_first_timestamp 
results_id 
results_last_timestamp 
results_proto 
results_total_bytes_rcvd 
results_total_bytes_sent 
results_url 
category 
id 

Ich fühle mich wie ich bin auf halbem Weg. Ich habe verschiedene Kombinationen und Ratschläge von anderen SO-Posts ausprobiert, um die verbleibenden Daten zu erhalten. Nichts hat bisher funktioniert. Ich weiß, dass das Problem, auf das ich stoße, auf der Verschachtelung beruht, ich muss nur einen Weg finden, um das gewünschte Ergebnis zu erzielen. Ich schätze Ihre Hilfe!

Antwort

0

scheint die richtige Idee, nur die results Schicht mit der entpackten detection Schicht verschmelzen müssen:

results = (json_normalize(data=df["results"], errors="ignore") 
      .drop("detection_detail_set", 1) 
      .add_prefix("results_")) 
results.columns = results.columns.str.replace("results_summary\\.", "results_") 

detection = json_normalize(data=df['results'], meta=['category', 'id'], 
          record_path='detection_detail_set', 
          record_prefix="detection_", errors='ignore') 

master = results.merge(detection, how="left", 
         left_on=["results_id", "results_category"], 
         right_on=["id", "category"]) 

master.columns 
Index(['results_c_score', 'results_category', 'results_description', 
     'results_dns_set', 'results_first_timestamp', 'results_host', 
     'results_id', 'results_last_timestamp', 'results_relayed_comm_set', 
     'results_sensor_luid', 'results_src_ip', 'results_state', 
     'results_anomalous_events', 'results_internal_targets', 
     'results_probable_owner', 'results_t_score', 'results_tags', 
     'results_targets_key_asset', 'results_triage_rule_id', 
     'results_type_vname', 'results_url', 'detection_count', 
     'detection_count_pos', 'detection_description', 'detection_dst_dns', 
     'detection_dst_geo', 'detection_dst_host_id', 'detection_dst_ip', 
     'detection_dst_port', 'detection_first_timestamp', 'detection_id', 
     'detection_last_timestamp', 'detection_proto', 
     'detection_total_bytes_rcvd', 'detection_total_bytes_sent', 
     'detection_url', 'category', 'id'], 
     dtype='object') 
+0

Sieht aus wie das der Trick! Sehr geschätzt! – pysec1

+0

Sie sind herzlich willkommen! –