no way to compare when less than two revisions
Differences
This shows you the differences between two versions of the page.
— | 01_30_2019 [2019/02/01 03:05] (current) – created cynthia.kahn | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | <file python tweetSanitization.py> | ||
+ | # | ||
+ | # | ||
+ | import json | ||
+ | import time | ||
+ | # | ||
+ | fO = open(" | ||
+ | with open(' | ||
+ | cnt=1 | ||
+ | for cnt, line in enumerate(f): | ||
+ | tweet = json.loads(line) # load it as Python dict | ||
+ | # | ||
+ | try: | ||
+ | tweetCreated = time.strftime(' | ||
+ | except: | ||
+ | tweetCreated =' | ||
+ | try: | ||
+ | tweetID=str(tweet[" | ||
+ | except: | ||
+ | tweetID=' | ||
+ | try: | ||
+ | tweetText = str(tweet[" | ||
+ | except: | ||
+ | tweetText = ' | ||
+ | try: | ||
+ | display_text_range = str(tweet[" | ||
+ | except: | ||
+ | display_text_range = ' | ||
+ | try: | ||
+ | source = str(tweet[" | ||
+ | except: | ||
+ | source = ' | ||
+ | try: | ||
+ | truncated = str(tweet[" | ||
+ | except: | ||
+ | truncated= ' | ||
+ | try: | ||
+ | in_reply_to_status_id = str(tweet[" | ||
+ | except: | ||
+ | in_reply_to_status_id= ' | ||
+ | try: | ||
+ | in_reply_to_user_id = str(tweet[" | ||
+ | except: | ||
+ | in_reply_to_user_id= ' | ||
+ | try: | ||
+ | in_reply_to_screen_name = str(tweet[" | ||
+ | except: | ||
+ | in_reply_to_screen_name = ' | ||
+ | try: | ||
+ | tweetUser = str(tweet[" | ||
+ | except: | ||
+ | tweetUser = ' | ||
+ | try: | ||
+ | tweetGeoType = str(tweet[" | ||
+ | except: | ||
+ | tweetGeoType = ' | ||
+ | try: | ||
+ | tweetGeoCoord = str(tweet[" | ||
+ | except: | ||
+ | tweetGeoCoord = ' | ||
+ | try: | ||
+ | tweetCoordType = str(tweet[" | ||
+ | except: | ||
+ | tweetCoordType = ' | ||
+ | try: | ||
+ | tweetCoord = str(tweet[" | ||
+ | except: | ||
+ | tweetCoord = ' | ||
+ | try: | ||
+ | placeID = str(tweet[" | ||
+ | except: | ||
+ | placeID = ' | ||
+ | try: | ||
+ | placeURL = str(tweet[" | ||
+ | except: | ||
+ | placeURL = ' | ||
+ | try: | ||
+ | placeType = str(tweet[" | ||
+ | except: | ||
+ | placeType = ' | ||
+ | try: | ||
+ | placeCountry = str(tweet[" | ||
+ | except: | ||
+ | placeCountry = ' | ||
+ | try: | ||
+ | tweetPFullName = str(tweet[" | ||
+ | except: | ||
+ | tweetPFullName = ' | ||
+ | try: | ||
+ | tweetPCountryCode = str(tweet[" | ||
+ | except: | ||
+ | tweetPCountryCode =' | ||
+ | try: | ||
+ | tweetPname = str(tweet[" | ||
+ | except: | ||
+ | tweetPname = ' | ||
+ | try: | ||
+ | tweetPBBox1x= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox1x=' | ||
+ | try: | ||
+ | tweetPBBox1y= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox1y=' | ||
+ | try: | ||
+ | tweetPBBox2x= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox2x=' | ||
+ | try: | ||
+ | tweetPBBox2y= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox2y=' | ||
+ | try: | ||
+ | tweetPBBox3x= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox3x=' | ||
+ | try: | ||
+ | tweetPBBox3y= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox3y=' | ||
+ | try: | ||
+ | tweetPBBox4x= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox4x=' | ||
+ | try: | ||
+ | tweetPBBox4y= str(tweet[" | ||
+ | except: | ||
+ | tweetPBBox4y=' | ||
+ | try: | ||
+ | placeAttributes = str(tweet[" | ||
+ | except: | ||
+ | placeAttributes = ' | ||
+ | try: | ||
+ | tweetContributors = str(tweet[" | ||
+ | except: | ||
+ | tweetContributors = ' | ||
+ | try: | ||
+ | is_quote_status= str(tweet[" | ||
+ | except: | ||
+ | is_quote_status= ' | ||
+ | try: | ||
+ | retweetCount= str(tweet[" | ||
+ | except: | ||
+ | retweetCount= ' | ||
+ | try: | ||
+ | favoriteCount= str(tweet[" | ||
+ | except: | ||
+ | favoriteCount= ' | ||
+ | try: | ||
+ | entitiesID= str(tweet[" | ||
+ | except: | ||
+ | entitiesID = ' | ||
+ | try: | ||
+ | entitiesMediaID= str(tweet[" | ||
+ | except: | ||
+ | entitiesMediaID = ' | ||
+ | try: | ||
+ | extendedEntitiesID= str(tweet[" | ||
+ | except: | ||
+ | extendedEntitiesID = ' | ||
+ | try: | ||
+ | favoritedstr = (tweet[" | ||
+ | except: | ||
+ | favorited = ' | ||
+ | try: | ||
+ | retweeted = (tweet[" | ||
+ | except: | ||
+ | retweeted = ' | ||
+ | try: | ||
+ | possibly_sensitive = (tweet[" | ||
+ | except: | ||
+ | possibly_sensitive = ' | ||
+ | try: | ||
+ | filter_level = (tweet[" | ||
+ | except: | ||
+ | filter_level = ' | ||
+ | try: | ||
+ | tweetLang = (tweet[" | ||
+ | except: | ||
+ | tweetLang = ' | ||
+ | try: | ||
+ | tweetTimestamp = (tweet[" | ||
+ | except: | ||
+ | tweetTimestamp = ' | ||
+ | try: | ||
+ | matchingRulesTag = (tweet[" | ||
+ | except: | ||
+ | matchingRulesTag = ' | ||
+ | try: | ||
+ | matchingRulesID = (tweet[" | ||
+ | except: | ||
+ | matchingRulesID = ' | ||
+ | fO.write(tweetCreated + " | ||
+ | + tweetID + " | ||
+ | + tweetText + " | ||
+ | + display_text_range + " | ||
+ | + source + " | ||
+ | + truncated + " | ||
+ | + in_reply_to_status_id + " | ||
+ | + in_reply_to_user_id + " | ||
+ | + in_reply_to_screen_name + " | ||
+ | + tweetUser + " | ||
+ | + tweetGeoType + " | ||
+ | + tweetGeoCoord + " | ||
+ | + tweetCoordType + " | ||
+ | + tweetCoord + " | ||
+ | + placeID + " | ||
+ | + placeURL + " | ||
+ | + placeType + " | ||
+ | + placeCountry + " | ||
+ | + tweetPFullName + " | ||
+ | + tweetPCountryCode + " | ||
+ | + tweetPname + " | ||
+ | + tweetPBBox1x +" | ||
+ | + tweetPBBox1y | ||
+ | + tweetPBBox2x + " | ||
+ | + tweetPBBox2y + " | ||
+ | + tweetPBBox3x + " | ||
+ | + tweetPBBox3y + " | ||
+ | + tweetPBBox4x + " | ||
+ | + tweetPBBox4y + " | ||
+ | + placeAttributes + " | ||
+ | + tweetContributors + " | ||
+ | + is_quote_status + " | ||
+ | + retweetCount + " | ||
+ | + favoriteCount + " | ||
+ | + entitiesID + " | ||
+ | + entitiesMediaID + " | ||
+ | + extendedEntitiesID + " | ||
+ | + favorited + " | ||
+ | + retweeted + " | ||
+ | + possibly_sensitive + " | ||
+ | + filter_level + " | ||
+ | + tweetLang + " | ||
+ | + tweetTimestamp + " | ||
+ | + matchingRulesTag + " | ||
+ | + matchingRulesID + " | ||
+ | | ||
+ | cnt +=1 | ||
+ | |||
+ | </ |