#!/usr/bin/python # import json import time # fO = open("main_table.txt", "w") with open('tweets_ds.json', 'r') as f: cnt=1 for cnt, line in enumerate(f): tweet = json.loads(line) # load it as Python dict #print(json.dumps(tweet, indent=4)) # pretty-print try: tweetCreated = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y')) except: tweetCreated ='NULL' try: tweetID=str(tweet["id"]) except: tweetID='0' try: tweetText = str(tweet["text"]).replace("\n","").replace("\r","").replace("\t","").replace(";","").strip() except: tweetText = 'NULL' try: display_text_range = str(tweet["display_text_range"]).strip() except: display_text_range = 'NULL' try: source = str(tweet["source"]) except: source = 'NULL' try: truncated = str(tweet["truncated"]) except: truncated= 'NULL' try: in_reply_to_status_id = str(tweet["in_reply_to_status_id"]) except: in_reply_to_status_id= '0' try: in_reply_to_user_id = str(tweet["in_reply_to_user_id"]) except: in_reply_to_user_id= '0' try: in_reply_to_screen_name = str(tweet["in_reply_to_screen_name"]).strip() except: in_reply_to_screen_name = 'NULL' try: tweetUser = str(tweet["user"]["id"]) except: tweetUser = '0' try: tweetGeoType = str(tweet["geo"]["type"]).strip() except: tweetGeoType = 'NULL' try: tweetGeoCoord = str(tweet["geo"]["coordinates"]).strip() except: tweetGeoCoord = 'NULL' try: tweetCoordType = str(tweet["coordinates"]["type"]).strip() except: tweetCoordType = 'NULL' try: tweetCoord = str(tweet["coordinates"]["coordinates"]).strip() except: tweetCoord = 'NULL' try: placeID = str(tweet["place"]["id"]).strip() except: placeID = 'NULL' try: placeURL = str(tweet["place"]["url"]).strip() except: placeURL = 'NULL' try: placeType = str(tweet["place"]["place_type"]).strip() except: placeType = 'NULL' try: placeCountry = str(tweet["place"]["country"]).strip() except: placeCountry = 'NULL' try: tweetPFullName = str(tweet["place"]["full_name"]).strip() except: tweetPFullName = 'NULL' try: tweetPCountryCode = str(tweet["place"]["country_code"]).strip() except: tweetPCountryCode ='NULL' try: tweetPname = str(tweet["place"]["name"]).strip() except: tweetPname = 'NULL' try: tweetPBBox1x= str(tweet["place"]["bounding_box"]["coordinates"][0][0][0]) except: tweetPBBox1x='NULL' try: tweetPBBox1y= str(tweet["place"]["bounding_box"]["coordinates"][0][0][1]) except: tweetPBBox1y='NULL' try: tweetPBBox2x= str(tweet["place"]["bounding_box"]["coordinates"][0][1][0]) except: tweetPBBox2x='NULL' try: tweetPBBox2y= str(tweet["place"]["bounding_box"]["coordinates"][0][1][1]) except: tweetPBBox2y='NULL' try: tweetPBBox3x= str(tweet["place"]["bounding_box"]["coordinates"][0][2][0]) except: tweetPBBox3x='NULL' try: tweetPBBox3y= str(tweet["place"]["bounding_box"]["coordinates"][0][2][1]) except: tweetPBBox3y='NULL' try: tweetPBBox4x= str(tweet["place"]["bounding_box"]["coordinates"][0][3][0]) except: tweetPBBox4x='NULL' try: tweetPBBox4y= str(tweet["place"]["bounding_box"]["coordinates"][0][3][1]) except: tweetPBBox4y='NULL' try: placeAttributes = str(tweet["place"]["attributes"]).strip() except: placeAttributes = 'NULL' try: tweetContributors = str(tweet["place"]["contributors"]).strip() except: tweetContributors = 'NULL' try: is_quote_status= str(tweet["is_quote_status"]) except: is_quote_status= 'NULL' try: retweetCount= str(tweet["retweet_count"]) except: retweetCount= '0' try: favoriteCount= str(tweet["favorite_count"]) except: favoriteCount= '0' try: entitiesID= str(tweet["entities"]["user_mentions"]["id"]) except: entitiesID = '0' try: entitiesMediaID= str(tweet["entities"]["media"]["id"]) except: entitiesMediaID = '0' try: extendedEntitiesID= str(tweet["extended_entities"]["media"]["id"]) except: extendedEntitiesID = '0' try: favoritedstr = (tweet["favorited"]).strip() except: favorited = 'NULL' try: retweeted = (tweet["retweeted"]).strip() except: retweeted = 'NULL' try: possibly_sensitive = (tweet["possibly_sensitive"]).strip() except: possibly_sensitive = 'NULL' try: filter_level = (tweet["filter_level"]).strip() except: filter_level = 'NULL' try: tweetLang = (tweet["lang"]).strip() except: tweetLang = 'NULL' try: tweetTimestamp = (tweet["timestamp_ms"]) except: tweetTimestamp = '0' try: matchingRulesTag = (tweet["matching_rules"]["tag"]).strip() except: matchingRulesTag = 'NULL' try: matchingRulesID = (tweet["matching_rules"]["id"]) except: matchingRulesID = '0' fO.write(tweetCreated + "\t" + tweetID + "\t" + tweetText + "\t" + display_text_range + "\t" + source + "\t" + truncated + "\t" + in_reply_to_status_id + "\t" + in_reply_to_user_id + "\t" + in_reply_to_screen_name + "\t" + tweetUser + "\t" + tweetGeoType + "\t" + tweetGeoCoord + "\t" + tweetCoordType + "\t" + tweetCoord + "\t" + placeID + "\t" + placeURL + "\t" + placeType + "\t" + placeCountry + "\t" + tweetPFullName + "\t" + tweetPCountryCode + "\t" + tweetPname + "\t" + tweetPBBox1x +"\t" + tweetPBBox1y + "\t" + tweetPBBox2x + "\t" + tweetPBBox2y + "\t" + tweetPBBox3x + "\t" + tweetPBBox3y + "\t" + tweetPBBox4x + "\t" + tweetPBBox4y + "\t" + placeAttributes + "\t" + tweetContributors + "\t" + is_quote_status + "\t" + retweetCount + "\t" + favoriteCount + "\t" + entitiesID + "\t" + entitiesMediaID + "\t" + extendedEntitiesID + "\t" + favorited + "\t" + retweeted + "\t" + possibly_sensitive + "\t" + filter_level + "\t" + tweetLang + "\t" + tweetTimestamp + "\t" + matchingRulesTag + "\t" + matchingRulesID + "\n") cnt +=1