insert ignore into tmp_strain_gtype_final_2 select ig1.variation_id,concat(ig1.allele_1,"/",ig2.allele_1) as allele_string,ip1.population_sample_id as sample_id1,ip2.population_sample_id as sample_id2, s1.name as sample_name1, s2.name as sample_name2 FROM population p1, population p2,tmp_individual_genotype_single_bp ig1, tmp_individual_genotype_single_bp ig2,individual_population ip1,individual_population ip2, sample s1, sample s2 WHERE ig1.variation_id=ig2.variation_id and ig1.allele_1=ig1.allele_2 AND ig2.allele_1 = ig2.allele_2 AND ig1.sample_id=ip1.individual_sample_id AND ig2.sample_id=ip2.individual_sample_id AND p1.sample_id = ip1.population_sample_id AND p2.sample_id = ip2.population_sample_id AND ig1.allele_1 != ig2.allele_1 and p1.sample_id != p2.sample_id and p1.is_strain=1 and p2.is_strain=1 and s1.sample_id = p1.sample_id AND s2.sample_id = p2.sample_id and s1.sample_id not in (177,181,201,218,233) and s2.sample_id not in (177,181,201,218,233);

It took 5 days on ecs1b. If I get rid of index(unique key variation_idx(variation_id,sample_id1,sample_id2),, it generate bigger files, took similar days, not worlth it.

same sql to generate tmp_strain_gtype_count_final, this took a hour or less? and same way to generate strain_gtype_final:
CREATE TABLE strain_gtype_final select s.*,c.count 
from tmp_strain_gtype_final s, tmp_strain_gtype_count_final c where s.sample_id1 = c.s
ample_id1 and s.sample_id2 = c.sample_id2;
ALTER TABLE strain_gtype_final ADD INDEX sample_id1(s
ample_id1), add INDEX sample_id2(sample_id2), add index variation_id(variation_id)

took a few hours?

use folloing sql to merge sanger snps and dbSNP snps first:
mysql> create table tmp_ids1 select vf1.variation_id as variation_id1, vf2.variation_i
d as variation_id2 from variation_feature vf1, variation_feature vf2 where vf1.seq_reg
ion_id=vf2.seq_region_id and vf1.seq_region_start=vf2.seq_region_start and vf1.seq_reg
ion_end=vf2.seq_region_end and vf1.source_id=1 and vf2.source_id=2 and vf1.allele_string=vf2.allele_string;

insert ignore into variation_synonym (variation_id,source_id,name) select tid.variation_id1,2,v2.name from tmp_ids1 tid, variation v1, variation v2 where tid.variation_id1 = v1.variation_id and tid.variation_id2=v2.variation_id;

alter table tmp_ids1 add index variation_id2x(variation_id2);

delete from variation v, tmp_ids1 t where v.variation_id=t.variation_id2
same way to delete from variation_feature,flanking_sequence,allele,tmp_individual_genotype_single_bp,transcript_variation.
