Summary
I'll be demonstrating Redis Search capabilities in a credit card transaction domain. All the data will be synthetically generated from the Faker module. Data will be stored as Hash sets in Redis. Subsequently, Redis Search will be leveraged to generate analytics on the data.
Architecture
Code Snippets
Data Generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
merchants_provider = DynamicProvider( | |
provider_name='merchants', | |
elements=['Walmart', 'Nordstrom', 'Amazon', 'Exxon', 'Kroger', 'Safeway', 'United Airlines', 'Office Depot', 'Ford', 'Taco Bell'] | |
) | |
categories_provider = DynamicProvider( | |
provider_name='categories', | |
elements= ['AUTO', 'FOOD', 'GASS', 'GIFT', 'TRAV', 'GROC', 'HOME', 'PERS', 'HEAL', 'MISC'] | |
) | |
def generate_data(client, count): | |
Faker.seed(0) | |
random.seed(0) | |
fake = Faker() | |
fake.add_provider(merchants_provider) | |
fake.add_provider(categories_provider) | |
for i in range(count): | |
tdate = fake.date_time_between(start_date='-3y', end_date='now') | |
txn_record = { | |
'acct_id': int(fake.ean(length=13)), | |
'txn_id': int(fake.ean(length=13)), | |
'txn_date': re.escape(tdate.isoformat()), | |
'txn_timestamp': time.mktime(tdate.timetuple()), | |
'card_last_4': fake.credit_card_number()[-4:], | |
'txn_amt': round(random.uniform(1, 1000), 2), | |
'txn_currency': 'USD', | |
'expense_category': fake.categories(), | |
'merchant_name': fake.merchants(), | |
'merchant_address': re.escape(fake.address()) | |
} | |
client.hset(f'{PREFIX}{txn_record["txn_id"]}', mapping=txn_record) |
Index Creation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
client.ft(IDX_NAME).dropindex() | |
except: | |
pass | |
idx_def = IndexDefinition(index_type=IndexType.HASH, prefix=[PREFIX]) | |
schema = [ | |
TagField('txn_id', sortable=True), | |
TextField('txn_date'), | |
NumericField('txn_timestamp', sortable=True), | |
NumericField('txn_amt'), | |
TagField('txn_currency'), | |
TagField('expense_category'), | |
TextField('merchant_name'), | |
TextField('merchant_address') | |
] | |
client.ft(IDX_NAME).create_index(schema, definition=idx_def) |
Sample Query
The query below aggregates total spend by category for those transactions with a dollar value >$500 in Dec 2021.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
request = AggregateRequest('(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])')\ | |
.group_by('@expense_category', reducers.sum('@txn_amt').alias('total_spend'))\ | |
.sort_by(Desc('@total_spend')) | |
result = client.ft(IDX_NAME).aggregate(request) |
Source
Copyright ©1993-2024 Joey E Whelan, All rights reserved.