从JSON文件创建元组

奎斯奎德

我一直在分析JSON文件以使其在PostgreSQL中更容易使用，并且想知道将JSON字典解析为元组的最佳方法是什么？

例如，这两个变量的行如下所示：

首先

"attributes": {"RestaurantsTableService": false, "GoodForMeal": {"dessert": false, "latenight": false, "lunch": false, "dinner": false, "breakfast": false, "brunch": false}, "Alcohol": "none", "Caters": true, "HasTV": false, "RestaurantsGoodForGroups": true, "NoiseLevel": "quiet", "WiFi": "no", "RestaurantsAttire": "casual", "RestaurantsReservations": false, "OutdoorSeating": false, "BusinessAcceptsCreditCards": true, "RestaurantsPriceRange2": 1, "BikeParking": true, "RestaurantsDelivery": false, "Ambience": {"romantic": false, "intimate": false, "classy": false, "hipster": false, "divey": false, "touristy": false, "trendy": false, "upscale": false, "casual": false}, "RestaurantsTakeOut": true, "GoodForKids": true, "BusinessParking": {"garage": false, "street": false, "validated": false, "lot": false, "valet": false}}

第二：

"hours": {"Monday": "7:30-22:00", "Tuesday": "7:30-22:00", "Friday": "7:30-22:00", "Wednesday": "7:30-22:00", "Thursday": "7:30-22:00", "Sunday": "7:30-21:00", "Saturday": "7:30-22:00"}

我希望它们采用以下格式：

对于属性：

Attributes: [(RestaurantsTableService, False)(dessert, False)(latenight, False)(lunch, False)(dinner, False)(breakfast, False)(brunch, False)(Alcohol, none)(Caters, True)(HasTV, False)(RestaurantsGoodForGroups, True)(NoiseLevel, quiet)(WiFi, no)(RestaurantsAttire, casual)(RestaurantsReservations, False)(OutdoorSeating, False)(BusinessAcceptsCreditCards, True)(RestaurantsPriceRange2, 1)(BikeParking, True)(RestaurantsDelivery, False)(romantic, False)(intimate, False)(classy, False)(hipster, False)(divey, False)(touristy, False)(trendy, False)(upscale, False)(casual, False)(RestaurantsTakeOut, True)(GoodForKids, True)(garage, False)(street, False)(validated, False)(lot, False)(valet, False)]

用了几个小时：

Hours: [(Friday, 9:00,12:00)(Tuesday, 14:00,19:00)(Thursday, 14:00,19:00)(Wednesday, 14:30,17:00)(Monday, 14:30,17:00)]

到目前为止，这是我的代码。现在，我一直在尝试访问变量中每个字典的值。我能够遍历它们，但无法访问布尔值，整数或字符串值。

import json
import ast
import pandas as pd
from datetime import datetime
from collections import OrderedDict, defaultdict

def cleanStr4SQL(s):
    return s.replace("'","`").replace("\n"," ")


def parseBusinessData():
    #read the JSON file
    with open('yelp_business.JSON','r') as f:  #Assumes that the data files are available in the current director. If not, you should set the path for the yelp data files.  
        outfile =  open('business.txt', 'w')
        line = f.readline()
        count_line = 0
        #read each JSON abject and extract data
        while line:
            data = json.loads(line)


#            jsondict = ast.literal_eval(str(data))
#            df = pd.DataFrame(jsondict['attributes'])
#            df['features'] = df.index.str.rjust(5, '0')
#            df['atts'] = df['features'].apply(attributes)
#            outfile.write(str([item for item in df['atts']]) + '\t')
#            df = df.apply(attributes)
#            print(str([item for item in data['attributes']]))
            outfile.write(str([k for k in [item for item in [l for l in data['attributes']]]]) + '\t') # write your own code to process attributes
            outfile.write(str([item for item in data['hours']]) + '\t') # write your own code to process hours
            outfile.write('\n');

            line = f.readline()
            count_line +=1
    print(count_line)
    outfile.close()
    f.close()

def attributes(val):
    if val == False:
        return 0
    if val == True:
        return 1

如果您还有其他问题或疑虑，请告诉我。任何建议表示赞赏。

感谢您的阅读。

马克·托洛宁

据我了解的问题，您似乎想要生成一种特定格式的字符串，以平整嵌套字典中的属性。

这是你想要的吗？

import json

def flatten(D,key):
    L = []
    for k,v in D[key].items():
        if isinstance(v,dict):
            for kk,vv in v.items():
                L.append((kk,vv))
        else:
            L.append((k,v))
    return L

att_json = '{"attributes": {"RestaurantsTableService": false, "GoodForMeal": {"dessert": false, "latenight": false, "lunch": false, "dinner": false, "breakfast": false, "brunch": false}, "Alcohol": "none", "Caters": true, "HasTV": false, "RestaurantsGoodForGroups": true, "NoiseLevel": "quiet", "WiFi": "no", "RestaurantsAttire": "casual", "RestaurantsReservations": false, "OutdoorSeating": false, "BusinessAcceptsCreditCards": true, "RestaurantsPriceRange2": 1, "BikeParking": true, "RestaurantsDelivery": false, "Ambience": {"romantic": false, "intimate": false, "classy": false, "hipster": false, "divey": false, "touristy": false, "trendy": false, "upscale": false, "casual": false}, "RestaurantsTakeOut": true, "GoodForKids": true, "BusinessParking": {"garage": false, "street": false, "validated": false, "lot": false, "valet": false}}}'
att = json.loads(att_json)
att_list = flatten(att,'attributes')
s = 'Attributes: [' + ''.join(['({}, {})'.format(k,v) for k,v in att_list]) + ']'
print(s)

hours_json = '{"hours": {"Monday": "7:30-22:00", "Tuesday": "7:30-22:00", "Friday": "7:30-22:00", "Wednesday": "7:30-22:00", "Thursday": "7:30-22:00", "Sunday": "7:30-21:00", "Saturday": "7:30-22:00"}}'
hours = json.loads(hours_json)
hours_list = flatten(hours,'hours')
s = 'Hours: [' + ''.join(['({}, {})'.format(k,v.replace('-',',')) for k,v in hours_list]) + ']'
print(s)

输出：

Attributes: [(RestaurantsTableService, False)(dessert, False)(latenight, False)(lunch, False)(dinner, False)(breakfast, False)(brunch, False)(Alcohol, none)(Caters, True)(HasTV, False)(RestaurantsGoodForGroups, True)(NoiseLevel, quiet)(WiFi, no)(RestaurantsAttire, casual)(RestaurantsReservations, False)(OutdoorSeating, False)(BusinessAcceptsCreditCards, True)(RestaurantsPriceRange2, 1)(BikeParking, True)(RestaurantsDelivery, False)(romantic, False)(intimate, False)(classy, False)(hipster, False)(divey, False)(touristy, False)(trendy, False)(upscale, False)(casual, False)(RestaurantsTakeOut, True)(GoodForKids, True)(garage, False)(street, False)(validated, False)(lot, False)(valet, False)]
Hours: [(Monday, 7:30,22:00)(Tuesday, 7:30,22:00)(Friday, 7:30,22:00)(Wednesday, 7:30,22:00)(Thursday, 7:30,22:00)(Sunday, 7:30,21:00)(Saturday, 7:30,22:00)]

本文收集自互联网，转载请注明来源。

如有侵权，请联系 [email protected] 删除。