extract data from JSON string

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP











up vote
2
down vote

favorite












I've got to extract a license plate digit and it's associated confidence from a JSON string that looks like:






"response":
"container":
"id": "0df307bc-06b2-45cf-b7ff-ce07fd04e04d",
"timestamp": "2018-Jul-10 17:34:27.448632"
,
"id": "00000002-0000-0000-0000-000000000015"
,
"frames":
"frame":
"id": "5583",
"timestamp": "2016-Nov-30 13:05:27",
"lps":
"lp":
"licenseplate": "15451BBL",
"text": "15451BBL",
"wtext": "15451BBL",
"confidence": "20",
"bkcolor": "16777215",
"color": "16777215",
"type": "0",
"ntip": "11",
"cct_country_short": "",
"cct_state_short": "",
"tips":
"tip":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"tip":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"tip":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"tip":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1176", "y": "638" ,
"p": "x": "1185", "y": "637" ,
"p": "x": "1184", "y": "661" ,
"p": "x": "1175", "y": "662"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "7"

,
"ncharacter": "8",
"characters":
"characater":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"characater":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"characater":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"characater":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"

,
"det_time_us": "1104009",
"poly":
"p": "x": "1088", "y": "642" ,
"p": "x": "1210", "y": "634" ,
"p": "x": "1210", "y": "661" ,
"p": "x": "1087", "y": "669"


,
"det_time_us": "1710270"







I've got something like:



$ jq -r '.frames.frame.lps.lp|.characters.characater.code_ascii,.characters.characater.confidence' test.json


but it only returns one single letter & it's conf. score....



Question



  • How can I get all letters and the associated score back?

Output



I expect the output to be like:



1 97, 5 89, 4 97,5 97, 1 77,B 97, B 94, L 34, J, 57,J 57, 4 7, 1 97, 5 89, 4 97, 5 97, 1 77, B 97, B 94, L 34


NOTE: format can be different, this is just indicating the data I wanted to extract.



copy & paste



input file "test.json"



"response":"container":"id":"41d6efcb-24d6-490d-8880-762255519b5f","timestamp":"2018-Jul-11 19:51:06.461665","id":"00000002-0000-0000-0000-000000000015","frames":"frame":"id":"5583","timestamp":"2016-Nov-30 13:05:27","lps":"lp":"licenseplate":"15451BBL","text":"15451BBL","wtext":"15451BBL","confidence":"20","bkcolor":"16777215","color":"16777215","type":"0","ntip":"11","cct_country_short":"","cct_state_short":"","tips":"tip":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","tip":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","tip":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","tip":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","tip":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","tip":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","tip":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","tip":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1176","y":"638","p":"x":"1185","y":"637","p":"x":"1184","y":"661","p":"x":"1175","y":"662","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"7","ncharacter":"8","characters":"characater":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","characater":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","characater":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","characater":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","characater":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","characater":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","characater":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","characater":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","det_time_us":"1072592","poly":"p":"x":"1088","y":"642","p":"x":"1210","y":"634","p":"x":"1210","y":"661","p":"x":"1087","y":"669","det_time_us":"1720812"


link



input file: https://drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/view?usp=sharing







share|improve this question

















  • 2




    This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
    – slm♦
    Jul 12 at 23:27










  • @slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
    – cerr
    Jul 12 at 23:59






  • 1




    @cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
    – slm♦
    Jul 13 at 0:00














up vote
2
down vote

favorite












I've got to extract a license plate digit and it's associated confidence from a JSON string that looks like:






"response":
"container":
"id": "0df307bc-06b2-45cf-b7ff-ce07fd04e04d",
"timestamp": "2018-Jul-10 17:34:27.448632"
,
"id": "00000002-0000-0000-0000-000000000015"
,
"frames":
"frame":
"id": "5583",
"timestamp": "2016-Nov-30 13:05:27",
"lps":
"lp":
"licenseplate": "15451BBL",
"text": "15451BBL",
"wtext": "15451BBL",
"confidence": "20",
"bkcolor": "16777215",
"color": "16777215",
"type": "0",
"ntip": "11",
"cct_country_short": "",
"cct_state_short": "",
"tips":
"tip":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"tip":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"tip":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"tip":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1176", "y": "638" ,
"p": "x": "1185", "y": "637" ,
"p": "x": "1184", "y": "661" ,
"p": "x": "1175", "y": "662"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "7"

,
"ncharacter": "8",
"characters":
"characater":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"characater":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"characater":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"characater":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"

,
"det_time_us": "1104009",
"poly":
"p": "x": "1088", "y": "642" ,
"p": "x": "1210", "y": "634" ,
"p": "x": "1210", "y": "661" ,
"p": "x": "1087", "y": "669"


,
"det_time_us": "1710270"







I've got something like:



$ jq -r '.frames.frame.lps.lp|.characters.characater.code_ascii,.characters.characater.confidence' test.json


but it only returns one single letter & it's conf. score....



Question



  • How can I get all letters and the associated score back?

Output



I expect the output to be like:



1 97, 5 89, 4 97,5 97, 1 77,B 97, B 94, L 34, J, 57,J 57, 4 7, 1 97, 5 89, 4 97, 5 97, 1 77, B 97, B 94, L 34


NOTE: format can be different, this is just indicating the data I wanted to extract.



copy & paste



input file "test.json"



"response":"container":"id":"41d6efcb-24d6-490d-8880-762255519b5f","timestamp":"2018-Jul-11 19:51:06.461665","id":"00000002-0000-0000-0000-000000000015","frames":"frame":"id":"5583","timestamp":"2016-Nov-30 13:05:27","lps":"lp":"licenseplate":"15451BBL","text":"15451BBL","wtext":"15451BBL","confidence":"20","bkcolor":"16777215","color":"16777215","type":"0","ntip":"11","cct_country_short":"","cct_state_short":"","tips":"tip":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","tip":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","tip":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","tip":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","tip":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","tip":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","tip":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","tip":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1176","y":"638","p":"x":"1185","y":"637","p":"x":"1184","y":"661","p":"x":"1175","y":"662","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"7","ncharacter":"8","characters":"characater":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","characater":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","characater":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","characater":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","characater":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","characater":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","characater":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","characater":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","det_time_us":"1072592","poly":"p":"x":"1088","y":"642","p":"x":"1210","y":"634","p":"x":"1210","y":"661","p":"x":"1087","y":"669","det_time_us":"1720812"


link



input file: https://drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/view?usp=sharing







share|improve this question

















  • 2




    This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
    – slm♦
    Jul 12 at 23:27










  • @slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
    – cerr
    Jul 12 at 23:59






  • 1




    @cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
    – slm♦
    Jul 13 at 0:00












up vote
2
down vote

favorite









up vote
2
down vote

favorite











I've got to extract a license plate digit and it's associated confidence from a JSON string that looks like:






"response":
"container":
"id": "0df307bc-06b2-45cf-b7ff-ce07fd04e04d",
"timestamp": "2018-Jul-10 17:34:27.448632"
,
"id": "00000002-0000-0000-0000-000000000015"
,
"frames":
"frame":
"id": "5583",
"timestamp": "2016-Nov-30 13:05:27",
"lps":
"lp":
"licenseplate": "15451BBL",
"text": "15451BBL",
"wtext": "15451BBL",
"confidence": "20",
"bkcolor": "16777215",
"color": "16777215",
"type": "0",
"ntip": "11",
"cct_country_short": "",
"cct_state_short": "",
"tips":
"tip":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"tip":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"tip":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"tip":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1176", "y": "638" ,
"p": "x": "1185", "y": "637" ,
"p": "x": "1184", "y": "661" ,
"p": "x": "1175", "y": "662"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "7"

,
"ncharacter": "8",
"characters":
"characater":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"characater":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"characater":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"characater":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"

,
"det_time_us": "1104009",
"poly":
"p": "x": "1088", "y": "642" ,
"p": "x": "1210", "y": "634" ,
"p": "x": "1210", "y": "661" ,
"p": "x": "1087", "y": "669"


,
"det_time_us": "1710270"







I've got something like:



$ jq -r '.frames.frame.lps.lp|.characters.characater.code_ascii,.characters.characater.confidence' test.json


but it only returns one single letter & it's conf. score....



Question



  • How can I get all letters and the associated score back?

Output



I expect the output to be like:



1 97, 5 89, 4 97,5 97, 1 77,B 97, B 94, L 34, J, 57,J 57, 4 7, 1 97, 5 89, 4 97, 5 97, 1 77, B 97, B 94, L 34


NOTE: format can be different, this is just indicating the data I wanted to extract.



copy & paste



input file "test.json"



"response":"container":"id":"41d6efcb-24d6-490d-8880-762255519b5f","timestamp":"2018-Jul-11 19:51:06.461665","id":"00000002-0000-0000-0000-000000000015","frames":"frame":"id":"5583","timestamp":"2016-Nov-30 13:05:27","lps":"lp":"licenseplate":"15451BBL","text":"15451BBL","wtext":"15451BBL","confidence":"20","bkcolor":"16777215","color":"16777215","type":"0","ntip":"11","cct_country_short":"","cct_state_short":"","tips":"tip":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","tip":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","tip":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","tip":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","tip":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","tip":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","tip":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","tip":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1176","y":"638","p":"x":"1185","y":"637","p":"x":"1184","y":"661","p":"x":"1175","y":"662","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"7","ncharacter":"8","characters":"characater":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","characater":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","characater":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","characater":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","characater":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","characater":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","characater":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","characater":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","det_time_us":"1072592","poly":"p":"x":"1088","y":"642","p":"x":"1210","y":"634","p":"x":"1210","y":"661","p":"x":"1087","y":"669","det_time_us":"1720812"


link



input file: https://drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/view?usp=sharing







share|improve this question













I've got to extract a license plate digit and it's associated confidence from a JSON string that looks like:






"response":
"container":
"id": "0df307bc-06b2-45cf-b7ff-ce07fd04e04d",
"timestamp": "2018-Jul-10 17:34:27.448632"
,
"id": "00000002-0000-0000-0000-000000000015"
,
"frames":
"frame":
"id": "5583",
"timestamp": "2016-Nov-30 13:05:27",
"lps":
"lp":
"licenseplate": "15451BBL",
"text": "15451BBL",
"wtext": "15451BBL",
"confidence": "20",
"bkcolor": "16777215",
"color": "16777215",
"type": "0",
"ntip": "11",
"cct_country_short": "",
"cct_state_short": "",
"tips":
"tip":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"tip":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"tip":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"tip":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"tip":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1103", "y": "655" ,
"p": "x": "1111", "y": "655" ,
"p": "x": "1111", "y": "667" ,
"p": "x": "1103", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "74",
"code_ascii": "J",
"confidence": "57"
,
"tip":
"poly":
"p": "x": "1176", "y": "638" ,
"p": "x": "1185", "y": "637" ,
"p": "x": "1184", "y": "661" ,
"p": "x": "1175", "y": "662"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "7"

,
"ncharacter": "8",
"characters":
"characater":
"poly":
"p": "x": "1094", "y": "643" ,
"p": "x": "1099", "y": "643" ,
"p": "x": "1099", "y": "667" ,
"p": "x": "1094", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1103", "y": "642" ,
"p": "x": "1113", "y": "642" ,
"p": "x": "1112", "y": "667" ,
"p": "x": "1102", "y": "667"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "89"
,
"characater":
"poly":
"p": "x": "1112", "y": "640" ,
"p": "x": "1122", "y": "640" ,
"p": "x": "1122", "y": "666" ,
"p": "x": "1112", "y": "666"
,
"bkcolor": "16777215",
"color": "0",
"code": "52",
"code_ascii": "4",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1123", "y": "640" ,
"p": "x": "1132", "y": "640" ,
"p": "x": "1131", "y": "665" ,
"p": "x": "1123", "y": "665"
,
"bkcolor": "16777215",
"color": "0",
"code": "53",
"code_ascii": "5",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1134", "y": "640" ,
"p": "x": "1139", "y": "640" ,
"p": "x": "1139", "y": "664" ,
"p": "x": "1133", "y": "664"
,
"bkcolor": "16777215",
"color": "0",
"code": "49",
"code_ascii": "1",
"confidence": "77"
,
"characater":
"poly":
"p": "x": "1154", "y": "639" ,
"p": "x": "1163", "y": "639" ,
"p": "x": "1163", "y": "663" ,
"p": "x": "1153", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "97"
,
"characater":
"poly":
"p": "x": "1164", "y": "638" ,
"p": "x": "1173", "y": "638" ,
"p": "x": "1173", "y": "663" ,
"p": "x": "1163", "y": "663"
,
"bkcolor": "16777215",
"color": "0",
"code": "66",
"code_ascii": "B",
"confidence": "94"
,
"characater":
"poly":
"p": "x": "1191", "y": "637" ,
"p": "x": "1206", "y": "636" ,
"p": "x": "1205", "y": "660" ,
"p": "x": "1190", "y": "661"
,
"bkcolor": "16777215",
"color": "0",
"code": "76",
"code_ascii": "L",
"confidence": "34"

,
"det_time_us": "1104009",
"poly":
"p": "x": "1088", "y": "642" ,
"p": "x": "1210", "y": "634" ,
"p": "x": "1210", "y": "661" ,
"p": "x": "1087", "y": "669"


,
"det_time_us": "1710270"







I've got something like:



$ jq -r '.frames.frame.lps.lp|.characters.characater.code_ascii,.characters.characater.confidence' test.json


but it only returns one single letter & it's conf. score....



Question



  • How can I get all letters and the associated score back?

Output



I expect the output to be like:



1 97, 5 89, 4 97,5 97, 1 77,B 97, B 94, L 34, J, 57,J 57, 4 7, 1 97, 5 89, 4 97, 5 97, 1 77, B 97, B 94, L 34


NOTE: format can be different, this is just indicating the data I wanted to extract.



copy & paste



input file "test.json"



"response":"container":"id":"41d6efcb-24d6-490d-8880-762255519b5f","timestamp":"2018-Jul-11 19:51:06.461665","id":"00000002-0000-0000-0000-000000000015","frames":"frame":"id":"5583","timestamp":"2016-Nov-30 13:05:27","lps":"lp":"licenseplate":"15451BBL","text":"15451BBL","wtext":"15451BBL","confidence":"20","bkcolor":"16777215","color":"16777215","type":"0","ntip":"11","cct_country_short":"","cct_state_short":"","tips":"tip":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","tip":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","tip":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","tip":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","tip":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","tip":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","tip":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","tip":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1103","y":"655","p":"x":"1111","y":"655","p":"x":"1111","y":"667","p":"x":"1103","y":"667","bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57","tip":"poly":"p":"x":"1176","y":"638","p":"x":"1185","y":"637","p":"x":"1184","y":"661","p":"x":"1175","y":"662","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"7","ncharacter":"8","characters":"characater":"poly":"p":"x":"1094","y":"643","p":"x":"1099","y":"643","p":"x":"1099","y":"667","p":"x":"1094","y":"667","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97","characater":"poly":"p":"x":"1103","y":"642","p":"x":"1113","y":"642","p":"x":"1112","y":"667","p":"x":"1102","y":"667","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89","characater":"poly":"p":"x":"1112","y":"640","p":"x":"1122","y":"640","p":"x":"1122","y":"666","p":"x":"1112","y":"666","bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97","characater":"poly":"p":"x":"1123","y":"640","p":"x":"1132","y":"640","p":"x":"1131","y":"665","p":"x":"1123","y":"665","bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97","characater":"poly":"p":"x":"1134","y":"640","p":"x":"1139","y":"640","p":"x":"1139","y":"664","p":"x":"1133","y":"664","bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77","characater":"poly":"p":"x":"1154","y":"639","p":"x":"1163","y":"639","p":"x":"1163","y":"663","p":"x":"1153","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97","characater":"poly":"p":"x":"1164","y":"638","p":"x":"1173","y":"638","p":"x":"1173","y":"663","p":"x":"1163","y":"663","bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94","characater":"poly":"p":"x":"1191","y":"637","p":"x":"1206","y":"636","p":"x":"1205","y":"660","p":"x":"1190","y":"661","bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34","det_time_us":"1072592","poly":"p":"x":"1088","y":"642","p":"x":"1210","y":"634","p":"x":"1210","y":"661","p":"x":"1087","y":"669","det_time_us":"1720812"


link



input file: https://drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/view?usp=sharing









share|improve this question












share|improve this question




share|improve this question








edited Jul 13 at 14:38
























asked Jul 12 at 22:33









cerr

61772235




61772235







  • 2




    This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
    – slm♦
    Jul 12 at 23:27










  • @slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
    – cerr
    Jul 12 at 23:59






  • 1




    @cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
    – slm♦
    Jul 13 at 0:00












  • 2




    This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
    – slm♦
    Jul 12 at 23:27










  • @slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
    – cerr
    Jul 12 at 23:59






  • 1




    @cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
    – slm♦
    Jul 13 at 0:00







2




2




This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
– slm♦
Jul 12 at 23:27




This doesn't seem like valid JSON data to me - jsonlint.com shows it as invalid - SyntaxError: Duplicate key 'p' on line 32.
– slm♦
Jul 12 at 23:27












@slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
– cerr
Jul 12 at 23:59




@slm yes, i saw this as a warning too but I can't modify the string. This is what I'm receiving from a third party module... :(
– cerr
Jul 12 at 23:59




1




1




@cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
– slm♦
Jul 13 at 0:00




@cerr - when I started trying to parse it it's not putting the letters into an array, so it's tough to parse that.
– slm♦
Jul 13 at 0:00










3 Answers
3






active

oldest

votes

















up vote
2
down vote



accepted










Since your characters array is not a proper JSON array (it's a hash where each key has the same name), only the last value of it will be available.



We can fix that by preprocessing the data to create unique keys for each object that is now called characater:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json


This would replace each characater with char1, char2 etc.



We can now access all values in that with e.g.



jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


The complete pipeline:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json |
jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


Result (given the data in the question):



1 97
5 89
4 97
5 97
1 77
B 97
B 94
L 34


If you control the generation of the JSON document, you should consider turning the characters object into a proper array.






share|improve this answer





















  • This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
    – cerr
    Jul 13 at 14:23







  • 1




    @cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
    – Kusalananda
    Jul 13 at 14:31











  • Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
    – cerr
    Jul 13 at 14:35







  • 1




    @cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
    – Kusalananda
    Jul 13 at 14:36







  • 2




    @cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
    – Kusalananda
    Jul 13 at 15:05

















up vote
1
down vote













Given your input isn't valid JSON you'll likely have to go with a sed, awk, grep type of solution. To that end the following can deal with input where it's been 'minified' into a single string:



$ grep -oP '"code_ascii":"w+","confidence":"w+"' <FILE> | grep -oP '(?<=:)"w+"' | paste - -


Example



$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"


Alternatives



NOTE: These work with multi-line input data.



sed+paste

$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -


sed,awk,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' 'print $2' | paste -d" " - -


sed,grep,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -


grep+paste

$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -





share|improve this answer























  • Get it here: txt.do/dzp91
    – cerr
    Jul 13 at 4:53










  • @cerr - cool, I'll make another alt
    – slm♦
    Jul 13 at 5:09










  • @cerr - try the grep+paste one
    – slm♦
    Jul 13 at 5:24

















up vote
1
down vote













grep + sed + tr :



grep -e code_ascii -e confidence <file> | sed 's/.*: "(.*)"/1/;' | tr "n" ' '


Return :



20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


This command return only the pairs code_ascii + confidence :



 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "(.*)"/1/;T;h;n;s/.*"confidence": "(.*)"/1/;T;H;g;p' | tr "n" " " 


Return :



 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


Now, see above the Kusalananda's post. With just sed + tr :



sed -i 's/},/},n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "(.*)"/1/;Tz;:zz;h;n;s/.*"confidence": "(.*)"/1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "n" " "


Return :



1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 





share|improve this answer























  • Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
    – cerr
    Jul 13 at 4:48











  • bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
    – alux
    Jul 13 at 5:03










  • sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
    – cerr
    Jul 13 at 5:05










  • As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
    – alux
    Jul 13 at 5:13










  • He doesn't want the first 20 you're showing in your output.
    – slm♦
    Jul 13 at 5:40










Your Answer







StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "106"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);



);








 

draft saved


draft discarded


















StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2funix.stackexchange.com%2fquestions%2f454998%2fextract-data-from-json-string%23new-answer', 'question_page');

);

Post as a guest






























3 Answers
3






active

oldest

votes








3 Answers
3






active

oldest

votes









active

oldest

votes






active

oldest

votes








up vote
2
down vote



accepted










Since your characters array is not a proper JSON array (it's a hash where each key has the same name), only the last value of it will be available.



We can fix that by preprocessing the data to create unique keys for each object that is now called characater:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json


This would replace each characater with char1, char2 etc.



We can now access all values in that with e.g.



jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


The complete pipeline:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json |
jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


Result (given the data in the question):



1 97
5 89
4 97
5 97
1 77
B 97
B 94
L 34


If you control the generation of the JSON document, you should consider turning the characters object into a proper array.






share|improve this answer





















  • This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
    – cerr
    Jul 13 at 14:23







  • 1




    @cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
    – Kusalananda
    Jul 13 at 14:31











  • Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
    – cerr
    Jul 13 at 14:35







  • 1




    @cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
    – Kusalananda
    Jul 13 at 14:36







  • 2




    @cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
    – Kusalananda
    Jul 13 at 15:05














up vote
2
down vote



accepted










Since your characters array is not a proper JSON array (it's a hash where each key has the same name), only the last value of it will be available.



We can fix that by preprocessing the data to create unique keys for each object that is now called characater:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json


This would replace each characater with char1, char2 etc.



We can now access all values in that with e.g.



jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


The complete pipeline:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json |
jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


Result (given the data in the question):



1 97
5 89
4 97
5 97
1 77
B 97
B 94
L 34


If you control the generation of the JSON document, you should consider turning the characters object into a proper array.






share|improve this answer





















  • This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
    – cerr
    Jul 13 at 14:23







  • 1




    @cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
    – Kusalananda
    Jul 13 at 14:31











  • Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
    – cerr
    Jul 13 at 14:35







  • 1




    @cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
    – Kusalananda
    Jul 13 at 14:36







  • 2




    @cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
    – Kusalananda
    Jul 13 at 15:05












up vote
2
down vote



accepted







up vote
2
down vote



accepted






Since your characters array is not a proper JSON array (it's a hash where each key has the same name), only the last value of it will be available.



We can fix that by preprocessing the data to create unique keys for each object that is now called characater:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json


This would replace each characater with char1, char2 etc.



We can now access all values in that with e.g.



jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


The complete pipeline:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json |
jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


Result (given the data in the question):



1 97
5 89
4 97
5 97
1 77
B 97
B 94
L 34


If you control the generation of the JSON document, you should consider turning the characters object into a proper array.






share|improve this answer













Since your characters array is not a proper JSON array (it's a hash where each key has the same name), only the last value of it will be available.



We can fix that by preprocessing the data to create unique keys for each object that is now called characater:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json


This would replace each characater with char1, char2 etc.



We can now access all values in that with e.g.



jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


The complete pipeline:



awk '/"characater"/ sub(""characater"", ""char" ++n """, $0) 1' file.json |
jq -r '.frames.frame.lps.lp|.characters|[.code_ascii,.confidence]|@tsv'


Result (given the data in the question):



1 97
5 89
4 97
5 97
1 77
B 97
B 94
L 34


If you control the generation of the JSON document, you should consider turning the characters object into a proper array.







share|improve this answer













share|improve this answer



share|improve this answer











answered Jul 13 at 7:47









Kusalananda

101k13199312




101k13199312











  • This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
    – cerr
    Jul 13 at 14:23







  • 1




    @cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
    – Kusalananda
    Jul 13 at 14:31











  • Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
    – cerr
    Jul 13 at 14:35







  • 1




    @cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
    – Kusalananda
    Jul 13 at 14:36







  • 2




    @cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
    – Kusalananda
    Jul 13 at 15:05
















  • This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
    – cerr
    Jul 13 at 14:23







  • 1




    @cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
    – Kusalananda
    Jul 13 at 14:31











  • Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
    – cerr
    Jul 13 at 14:35







  • 1




    @cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
    – Kusalananda
    Jul 13 at 14:36







  • 2




    @cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
    – Kusalananda
    Jul 13 at 15:05















This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
– cerr
Jul 13 at 14:23





This only prints two lines: 1 97 & L 34 i.e. the characater to charN only worked for one replacement
– cerr
Jul 13 at 14:23





1




1




@cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
– Kusalananda
Jul 13 at 14:31





@cerr Odd, I am using the data that you provided without modifications... Did you copy and past my awk code correctly?
– Kusalananda
Jul 13 at 14:31













Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
– cerr
Jul 13 at 14:35





Try: drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/…
– cerr
Jul 13 at 14:35





1




1




@cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
– Kusalananda
Jul 13 at 14:36





@cerr Ah! It's all on one line. In that case, change sub() to gsub(). No, sorry scrap that. n would not increment. Hold on, I need to doodle.
– Kusalananda
Jul 13 at 14:36





2




2




@cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
– Kusalananda
Jul 13 at 15:05




@cerr Yes, you can, with GNU sed put a newline after each }, with sed 's/},/},n/g' and then pass it through the awk and jq.
– Kusalananda
Jul 13 at 15:05












up vote
1
down vote













Given your input isn't valid JSON you'll likely have to go with a sed, awk, grep type of solution. To that end the following can deal with input where it's been 'minified' into a single string:



$ grep -oP '"code_ascii":"w+","confidence":"w+"' <FILE> | grep -oP '(?<=:)"w+"' | paste - -


Example



$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"


Alternatives



NOTE: These work with multi-line input data.



sed+paste

$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -


sed,awk,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' 'print $2' | paste -d" " - -


sed,grep,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -


grep+paste

$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -





share|improve this answer























  • Get it here: txt.do/dzp91
    – cerr
    Jul 13 at 4:53










  • @cerr - cool, I'll make another alt
    – slm♦
    Jul 13 at 5:09










  • @cerr - try the grep+paste one
    – slm♦
    Jul 13 at 5:24














up vote
1
down vote













Given your input isn't valid JSON you'll likely have to go with a sed, awk, grep type of solution. To that end the following can deal with input where it's been 'minified' into a single string:



$ grep -oP '"code_ascii":"w+","confidence":"w+"' <FILE> | grep -oP '(?<=:)"w+"' | paste - -


Example



$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"


Alternatives



NOTE: These work with multi-line input data.



sed+paste

$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -


sed,awk,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' 'print $2' | paste -d" " - -


sed,grep,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -


grep+paste

$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -





share|improve this answer























  • Get it here: txt.do/dzp91
    – cerr
    Jul 13 at 4:53










  • @cerr - cool, I'll make another alt
    – slm♦
    Jul 13 at 5:09










  • @cerr - try the grep+paste one
    – slm♦
    Jul 13 at 5:24












up vote
1
down vote










up vote
1
down vote









Given your input isn't valid JSON you'll likely have to go with a sed, awk, grep type of solution. To that end the following can deal with input where it's been 'minified' into a single string:



$ grep -oP '"code_ascii":"w+","confidence":"w+"' <FILE> | grep -oP '(?<=:)"w+"' | paste - -


Example



$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"


Alternatives



NOTE: These work with multi-line input data.



sed+paste

$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -


sed,awk,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' 'print $2' | paste -d" " - -


sed,grep,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -


grep+paste

$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -





share|improve this answer















Given your input isn't valid JSON you'll likely have to go with a sed, awk, grep type of solution. To that end the following can deal with input where it's been 'minified' into a single string:



$ grep -oP '"code_ascii":"w+","confidence":"w+"' <FILE> | grep -oP '(?<=:)"w+"' | paste - -


Example



$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"


Alternatives



NOTE: These work with multi-line input data.



sed+paste

$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -


sed,awk,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' 'print $2' | paste -d" " - -


sed,grep,paste

$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -


grep+paste

$ grep -oP '"code_ascii":"w+","confidence":"w+"' b.json | grep -oP '(?<=:)"w+"' | paste - -






share|improve this answer















share|improve this answer



share|improve this answer








edited Jul 13 at 5:40


























answered Jul 13 at 3:48









slm♦

233k65479650




233k65479650











  • Get it here: txt.do/dzp91
    – cerr
    Jul 13 at 4:53










  • @cerr - cool, I'll make another alt
    – slm♦
    Jul 13 at 5:09










  • @cerr - try the grep+paste one
    – slm♦
    Jul 13 at 5:24
















  • Get it here: txt.do/dzp91
    – cerr
    Jul 13 at 4:53










  • @cerr - cool, I'll make another alt
    – slm♦
    Jul 13 at 5:09










  • @cerr - try the grep+paste one
    – slm♦
    Jul 13 at 5:24















Get it here: txt.do/dzp91
– cerr
Jul 13 at 4:53




Get it here: txt.do/dzp91
– cerr
Jul 13 at 4:53












@cerr - cool, I'll make another alt
– slm♦
Jul 13 at 5:09




@cerr - cool, I'll make another alt
– slm♦
Jul 13 at 5:09












@cerr - try the grep+paste one
– slm♦
Jul 13 at 5:24




@cerr - try the grep+paste one
– slm♦
Jul 13 at 5:24










up vote
1
down vote













grep + sed + tr :



grep -e code_ascii -e confidence <file> | sed 's/.*: "(.*)"/1/;' | tr "n" ' '


Return :



20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


This command return only the pairs code_ascii + confidence :



 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "(.*)"/1/;T;h;n;s/.*"confidence": "(.*)"/1/;T;H;g;p' | tr "n" " " 


Return :



 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


Now, see above the Kusalananda's post. With just sed + tr :



sed -i 's/},/},n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "(.*)"/1/;Tz;:zz;h;n;s/.*"confidence": "(.*)"/1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "n" " "


Return :



1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 





share|improve this answer























  • Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
    – cerr
    Jul 13 at 4:48











  • bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
    – alux
    Jul 13 at 5:03










  • sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
    – cerr
    Jul 13 at 5:05










  • As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
    – alux
    Jul 13 at 5:13










  • He doesn't want the first 20 you're showing in your output.
    – slm♦
    Jul 13 at 5:40














up vote
1
down vote













grep + sed + tr :



grep -e code_ascii -e confidence <file> | sed 's/.*: "(.*)"/1/;' | tr "n" ' '


Return :



20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


This command return only the pairs code_ascii + confidence :



 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "(.*)"/1/;T;h;n;s/.*"confidence": "(.*)"/1/;T;H;g;p' | tr "n" " " 


Return :



 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


Now, see above the Kusalananda's post. With just sed + tr :



sed -i 's/},/},n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "(.*)"/1/;Tz;:zz;h;n;s/.*"confidence": "(.*)"/1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "n" " "


Return :



1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 





share|improve this answer























  • Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
    – cerr
    Jul 13 at 4:48











  • bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
    – alux
    Jul 13 at 5:03










  • sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
    – cerr
    Jul 13 at 5:05










  • As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
    – alux
    Jul 13 at 5:13










  • He doesn't want the first 20 you're showing in your output.
    – slm♦
    Jul 13 at 5:40












up vote
1
down vote










up vote
1
down vote









grep + sed + tr :



grep -e code_ascii -e confidence <file> | sed 's/.*: "(.*)"/1/;' | tr "n" ' '


Return :



20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


This command return only the pairs code_ascii + confidence :



 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "(.*)"/1/;T;h;n;s/.*"confidence": "(.*)"/1/;T;H;g;p' | tr "n" " " 


Return :



 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


Now, see above the Kusalananda's post. With just sed + tr :



sed -i 's/},/},n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "(.*)"/1/;Tz;:zz;h;n;s/.*"confidence": "(.*)"/1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "n" " "


Return :



1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 





share|improve this answer















grep + sed + tr :



grep -e code_ascii -e confidence <file> | sed 's/.*: "(.*)"/1/;' | tr "n" ' '


Return :



20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


This command return only the pairs code_ascii + confidence :



 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "(.*)"/1/;T;h;n;s/.*"confidence": "(.*)"/1/;T;H;g;p' | tr "n" " " 


Return :



 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 


Now, see above the Kusalananda's post. With just sed + tr :



sed -i 's/},/},n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "(.*)"/1/;Tz;:zz;h;n;s/.*"confidence": "(.*)"/1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "n" " "


Return :



1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 






share|improve this answer















share|improve this answer



share|improve this answer








edited Jul 13 at 19:18


























answered Jul 13 at 4:40









alux

364




364











  • Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
    – cerr
    Jul 13 at 4:48











  • bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
    – alux
    Jul 13 at 5:03










  • sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
    – cerr
    Jul 13 at 5:05










  • As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
    – alux
    Jul 13 at 5:13










  • He doesn't want the first 20 you're showing in your output.
    – slm♦
    Jul 13 at 5:40
















  • Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
    – cerr
    Jul 13 at 4:48











  • bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
    – alux
    Jul 13 at 5:03










  • sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
    – cerr
    Jul 13 at 5:05










  • As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
    – alux
    Jul 13 at 5:13










  • He doesn't want the first 20 you're showing in your output.
    – slm♦
    Jul 13 at 5:40















Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
– cerr
Jul 13 at 4:48





Hi alux, no, for me this prints the whole JSON string. (PS: I had my filename inserted at the right spot - it's not working for me)
– cerr
Jul 13 at 4:48













bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
– alux
Jul 13 at 5:03




bizare your file code, on my debian (904), just sed -n '/code_ascii/,/confidence/p' tmpfile block my consol. crtl-c oblige. Good luck
– alux
Jul 13 at 5:03












sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
– cerr
Jul 13 at 5:05




sed -n '/code_ascii/,/confidence/p' tmpfile will print the whole line
– cerr
Jul 13 at 5:05












As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
– alux
Jul 13 at 5:13




As written above grep -e code_ascii -e confidence tempfile | sed 's/.*: "(.*)"/1/;' | tr "n" ' works but not a simple sed -n '/code_ascii/,/confidence/p' tmpfile ???
– alux
Jul 13 at 5:13












He doesn't want the first 20 you're showing in your output.
– slm♦
Jul 13 at 5:40




He doesn't want the first 20 you're showing in your output.
– slm♦
Jul 13 at 5:40












 

draft saved


draft discarded


























 


draft saved


draft discarded














StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2funix.stackexchange.com%2fquestions%2f454998%2fextract-data-from-json-string%23new-answer', 'question_page');

);

Post as a guest













































































Popular posts from this blog

Peggy Mitchell

The Forum (Inglewood, California)

Palaiologos