Cluster proteins with CD-HIT and write results to DuckDB

CDHIT2duckdb(
  duckdb_path,
  output_path,
  output_prefix = "cdhit_out",
  identity = 0.9,
  word_length = 5,
  threads = 0,
  memory = 0,
  extra_args = c("-g", "1")
)