Script to infer database keys from data. – Tech Notes – Steve Schmidt
View this page on github to see the script with syntax highlighting and without formatting problems.
-- A SQL Server Transact-SQL script to infer the primary keys of and foreign key relationships among the
-- tables in a database based upon the data in the tables.
-- Copyright 2026 Stephen Schmidt. All rights reserved.
-- Contents of https://steve.czmyt.com/terms-and-conditions incorporated herein.
-- Columns being examined for key status.
create table [#columns]
(
[ID] smallint not null,
[Schema] nvarchar(128) not null,
[Table] nvarchar(128) not null,
[Column] nvarchar(128) not null
)
-- Hashed values of data in columns being examined.
create table [#values]
(
[Column ID] smallint not null,
[Hash] binary(64) not null,
[Count] int not null
)
-- Cursor to retrieve set of columns to be examined.
declare
[columns cursor]
cursor for select
[c].[TABLE_SCHEMA],
[c].[TABLE_NAME],
[c].[COLUMN_NAME]
from
[INFORMATION_SCHEMA].[COLUMNS] as [c]
join
[INFORMATION_SCHEMA].[TABLES] as [t] on
[t].[TABLE_CATALOG] = [c].[TABLE_CATALOG]
and
[t].[TABLE_SCHEMA] = [c].[TABLE_SCHEMA]
and
[t].[TABLE_NAME] = [c].[TABLE_NAME]
where
[t].[TABLE_TYPE] = 'BASE TABLE'
and
-- Ignore any tables whose name begins with a numeric digit
-- as they are usually snapshots or temporary tables.
[c].[TABLE_NAME] like '[A-Z]%'
and
-- Assume that primary and foreign key columns are of one of the following types:
[c].[DATA_TYPE] in ('bigint', 'binary', 'char', 'int', 'nchar', 'ntext', 'nvarchar', 'text',
'uniqueidentifier', 'varbinary', 'varchar')
and
-- Salesforce-specific exclusions.
[c].[COLUMN_NAME] not in ('CreatedById', 'Language', 'LastModifiedById', 'SortOrder')
-- Legacy system-specific exclusions.
and
[c].[TABLE_NAME] not like '% Changes %'
and
[c].[TABLE_NAME] not like '% Out'
and
[c].[COLUMN_NAME] not like 'inserted %'
and
[c].[COLUMN_NAME] not like 'updated %'
order by
[TABLE_SCHEMA],
[TABLE_NAME],
[ORDINAL_POSITION]
-- Hash and count the values in each of the columns selected for examination.
declare
@schema nvarchar(128),
@table nvarchar(128),
@column nvarchar(128),
@column_id smallint
select
@column_id = 0
open [columns cursor]
while 1 = 1
begin
fetch next from
[columns cursor]
into
@schema,
@table,
@column
if @@fetch_status != 0
break
select @column_id = @column_id + 1
insert into
[#columns]
values
(
@column_id,
@schema,
@table,
@column
)
-- Note: Would ideally prefer a better method of producing a hash that takes into account the collation
-- sequence in effect on the columns being examined than using "upper" to provide case insensitivity.
declare @sql varchar(4000)
select @sql = '
insert into
[#values]
select
' + cast(@column_id as varchar(6)) + ',
[Hash],
count(*)
from
(
select
hashbytes(''SHA2_512'', upper(cast([' + @column + '] as varchar(max)))) as [Hash]
from
[' + @schema + '].[' + @table + ']
where
[' + @column + '] is not null
) as [t]
group by
[Hash]
'
exec (@sql)
-- Uncomment for quick testing purposes:
--if @column_id >= 256 break
end
close [columns cursor]
deallocate [columns cursor]
-- Generate statistics on number of non-null values and number of unique values in each column.
select
[s].[Column ID],
[s].[Uniq Count],
[s].[Value Count],
[c].[Schema],
[c].[Table],
[c].[Column]
into
[#stats]
from
(
select
[Column ID],
-- Count of unique non-null values:
count(*) as [Uniq Count],
-- Count of non-null values:
sum([Count]) as [Value Count]
from
[#values]
group by
[Column ID]
) as [s]
join
[#columns] as [c] on
[c].[ID] = [s].[Column ID]
-- Determine potential primary key / foreign key combinations.
select
[p].[Schema] as [PK Schema],
[p].[Table] as [PK Table],
[p].[Column] as [PK Column],
[f].[Schema] as [FK Schema],
[f].[Table] as [FK Table],
[f].[Column] as [FK Column],
[p].[Uniq Count] as [PK Count],
[t].[PK Count] as [PK Used],
[t].[FK Count]
into
[#merge]
from
(
-- Count the number of matches that each combination of columns has in common.
select
[p].[Column ID] as [PK ID],
[f].[Column ID] as [FK ID],
sum([p].[Count]) as [PK Count],
sum([f].[Count]) as [FK Count]
from
[#values] as [p]
join
[#values] as [f] on
[f].[Hash] = [p].[Hash]
and
[f].[Column ID] != [p].[Column ID]
group by
[p].[Column ID],
[f].[Column ID]
) as [t]
join
[#stats] as [p] on
[p].[Column ID] = [t].[PK ID]
join
[#stats] as [f] on
[f].[Column ID] = [t].[FK ID]
where
-- The primary keys are those columns whose non-null values are unique among all rows.
[p].[Uniq Count] = [p].[Value Count]
and
-- The foreign keys are those columns whose non-null values are a subset of the values in the
-- primary key column. i.e. there are no values in the foreign key column for which there is
-- not a match in the primary key column.
[f].[Uniq Count] = [t].[PK Count]
-- Retrieve and display results of analysis.
select
*
from
[#merge]
order by
[PK Schema],
[PK Table],
[PK Column],
[FK Schema],
[FK Table],
[FK Column]
drop table [#merge]
drop table [#stats]
drop table [#values]
drop table [#columns]