Source code for acscsv.snowflake

#!/usr/bin/env python
import sys
import re
import time
import datetime
import calendar

"""
From twitter code on github
 workerIdBits = 5L
 datacenterIdBits = 5L
 sequenceBits = 12L
 
 maxWorkerId = -1L ^ (-1L << workerIdBits)
 maxDatacenterId = -1L ^ (-1L << datacenterIdBits)

 sequenceMask = -1L ^ (-1L << sequenceBits)
 
 workerIdShift = sequenceBits
 datacenterIdShift = sequenceBits + workerIdBits
 timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits

 ((timestamp - twepoch) << timestampLeftShift) |
          (datacenterId << datacenterIdShift) |
              (workerId << workerIdShift) | 
               sequence
"""

FMT = "%Y-%m-%dT%H:%M:%S"
TWEPOCH = 1288834974657.

SF_SEQ_BITS = 12
SF_WORK_BITS = 5
SF_DC_BITS = 5
SF_TIME_BITS = 41
SF_BITS = 64

nRE = re.compile("[0-9]{18}")

[docs]class Snowflake(object):
    """Snowflake id object provides access to multiple forms of of the bit fields
    present in a snowflake id. Ids can be of the form of a string with leading and
    trailing characters or symbols. The first 18 consecutive digits will be used
    as the id. It is also permitted to provide an int::

        tag:search.twitter.com,2005:113733024721539072
        113733024721539072
        113733024721539072|This is the tweet for which you are looking|en

    where the latter represents a pre-snowflake twitter id. In this case all 
    fields will be null.
    
    When the input does not match a valid snowflake id, the input field is available
    as the id and all other fields are set to None.

    Available fields::

        id 
        sequence 
        worker 
        data_center 
        ts 
        sample_set 
        timestamp 
        timeStruct 
        timeString 
        year 
        month 
        day 
        hour 
        min 
        sec 
        dow 
        doy 

    """

    def __init__(self, id):
        """Create a new snowflake object from string or number represenataion
        of id."""
        ns = nRE.findall(str(id))
        if len(ns) > 0:
            # only process first matching id in string
            self.id = int(ns[0])
            self.sequence = int(self.masked_id(SF_SEQ_BITS, 0))
            self.worker = int(self.masked_id(SF_WORK_BITS, SF_SEQ_BITS))
            self.data_center = int(self.masked_id(SF_DC_BITS, SF_WORK_BITS + SF_SEQ_BITS))
            self.ts = int(self.masked_id(SF_TIME_BITS, SF_DC_BITS + SF_WORK_BITS + SF_SEQ_BITS))
            self.sample_set = self.ts % 100
            # originally  ((self.id >> 22) + TWEPOCH)/1000.0 
            self.timestamp = (self.ts + TWEPOCH)/1000.
            self.timeStruct = time.gmtime(self.timestamp)
            self.timeString = time.strftime(FMT, self.timeStruct)
            self.year = self.timeStruct.tm_year
            self.month = self.timeStruct.tm_mon
            self.day = self.timeStruct.tm_mday
            self.hour = self.timeStruct.tm_hour
            self.min = self.timeStruct.tm_min
            self.sec = self.timeStruct.tm_sec
            self.dow = self.timeStruct.tm_wday
            self.doy = self.timeStruct.tm_yday
            #self.trials = [self.ndigits(self.id, 2)
            #        , self.ndigits(self.ts, 2)
            #        , self.ndigits(self.timestamp, 2)]
        if len(ns) == 0 or self.year < 2010 or self.year > datetime.datetime.now().year + 1:
            # no valid snowflake id found
            self.id = id  # pass through input
            self.sequence = self.worker = self.data_center = self.ts = self.timestamp = None
            self.timeStruct = self.timeString = self.year = self.month = self.day = None
            self.hour = self.min = self.sec = self.dow = self.doy = None

    #def ndigits(self, x, n):
        #return int(x - (10**n) * int(x/(10**n)))

    def masked_id(self, bits, pos):
        # returns an int
        mask = int('1'*bits, 2) << pos
        res = (mask & self.id) >> pos
        #print '%s' % bin(self.id).rjust(65)
        #print '%s' % bin(mask).rjust(65)
        #print '%s' % bin(res).rjust(65)
        return res
    
    def get_id_datetime(self):
        return [self.id, self.timeString]

    def __repr__(self):
        res = "#"*15 + "\n"
        res += "id:      %s\n"%self.id
        res += "seq:     %s\n"%self.sequence
        res += "worker:  %s\n"%self.worker
        res += "DS:      %s\n"%self.data_center
        res += "Seconds: %s\n"%self.timestamp
        res += "time:    %s\n"%self.timeString
        return res

if __name__ == "__main__":
    import csv
    wrtr = csv.writer(sys.stdout)
    for r in sys.stdin:
        ns = nRE.findall(r)
        try:
            for x in ns:
                sf = Snowflake(x)
                wrtr.writerow([sf.id
                    , sf.sequence
                    , sf.worker
                    , sf.data_center
                    , sf.timeString
                    , sf.hour
                    , sf.min
                    , sf.sec])
        except IndexError:
            sys.stderr.write( "ERROR %s"%ns)
Navigation

Source code for acscsv.snowflake

Quick search

Navigation