#!/usr/bin/python
# coding=utf-8
#
# This script downloads the list of new gTLDs from ICANN and formats it into
# the PSL format, writing to stdout.

import urllib2
import csv
from time import gmtime, strftime

url = "https://newgtlds.icann.org/newgtlds.csv"

# This only does cert validation with Python 2.7.9 and later
response = urllib2.urlopen(url)
csvreader = csv.reader(response, doublequote=False, escapechar='\\')

print "\n"
print "// List of new gTLDs imported from " + url + \
      " on %s" % strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
print

# Skip first two lines (datestamp and field definitions)
csvreader.next()
csvreader.next()

# CSV format:
# tld,u-label,registry-operator,date-of-contract-signature,application-id,delegation-date
# xn--hxt814e,网店,"Zodiac Libra Limited",2014-05-15,1-858-36255,2014-12-02
for row in csvreader:
    ulabel = row[0].strip()
    if row[1]:
        ulabel = row[1].strip()

    # PSL format:
    # // xn--hxt814e : 2014-05-15 Zodiac Libra Limited
    # 网店
    #
    line = "// %s : %s" % (row[0].strip(), row[3].strip())
    if row[2]:
        line = line + " " + row[2].strip()
    print line + "\n" + ulabel + "\n"
