#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-04-28 19:35:53 +0300 (Wed, 28 Apr 2021) $
#$Revision: 8738 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.8.1/scripts/oqmd2cif $
#------------------------------------------------------------------------------
#*
#* Retrieve structures from the Open Quantum Materials Database and
#* write them out in CIF format.
#*
#* USAGE:
#*    $0 --options structure1_id structure*_id
#**

use strict;
use warnings;
use DBI;
use COD::Cell qw( vectors2cell );
use COD::CIF::Tags::Print qw( print_single_tag_and_value );
use COD::Formulae::Parser::AdHoc;
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ToolsVersion qw( get_version_string );
use JSON qw( decode_json );

my $eV_per_atom_to_J_per_mol = 2660.476544548;

my %database = (
    host  => "localhost",
    user  => "reader",
    name  => "oqmd",
    tables => {
        structures => "structures",
        calculations => "calculations",
        calculations_meta_data => "calculations_meta_data",
        formation_energies => "formation_energies",
        meta_data => "meta_data",
        spacegroups => "spacegroups",
        atoms => "atoms",
        sites => "sites",
        wyckoffsites => "wyckoffsites",
    },
    password => "",
    platform => "mysql",
    dbh => undef,
);

my %smearing_methods = (
     0 => { main  => 'Gaussian' },
    -1 => { main  => 'Fermi-Dirac' },
    -2 => { other => 'partial occupancies are read in from the ' .
                     'WAVECAR or INCAR file' },
    -3 => { other => 'loop over smearing-parameters supplied in the ' .
                     'INCAR file is performed' },
    -4 => { other => 'tetrahedron method' },
    -5 => { other => 'tetrahedron method with Bl\"ochl corrections' }
);

my $debug = 0;
my %options;
my $limit;
my $atom_fformat = "%.12g";
my $cell_fformat = $atom_fformat;
# Default formats for floating-point numbers (for unit cell parameters
# and for atomic coordinates) -- large enough for IEEE double
# precision floating point number.

#* OPTIONS:
#*   -L, --limit 10, --limit 1000,10
#*                     Add the limit clause to the structure query statement
#*                     (default: all structures are output from the database).
#*
#*   -L-, --no-limit
#*                     Remove any previously set query limits.
#*
#*   -F, --float-format "%15.12f"
#*                     Specify format to print out floating point numbers.
#*
#*   -A, --atom-format "%15.12f"
#*                     Specify format to print out floating point numbers
#*                     for unit cell parameters.
#*
#*   -C, --cell-format "%15.12f"
#*                     Specify format to print out floating point numbers
#*                     for atomic coordinates.
#*
#*   --platform 'SQLite'
#*                     Use the SQL database platform 'SQLite' to query
#*                     structures (default: 'mysql').
#*
#*   -d, --database  cod
#*                     Use database "cod" to query for structures.
#*
#*   -h, --host   www.crystallography.net
#*   -s, --server www.crystallography.net
#*                     Query COD database on the host 'www.crystallography.net'.
#*
#*   -l, --localhost
#*                     Use database server on the localhost to query the COD
#*                     database.
#*
#*   -p, --port 3306
#*                     Use use the specified port to query structures
#*                     (default: 3306).
#*
#*   -t, --table  data
#*                     Use SQL table "data" to query for structures.
#*
#*   -u, --user reader
#*                     Use user name "reader" to access COD database; this
#*                     reader should be granted SELECT privilege, i.e. should
#*                     be able to read the COD database, without supplying a
#*                     password.
#*
#*   --password
#*                     Use the specified password to connect (default: '').
#*
#*   --debug, --no-debug
#*                     Switch on or off debug printouts.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-d,--database"  => \$database{name},
    "-l,--localhost" => sub { $database{host} = 'localhost' },
    "-h,--host"      => \$database{host},
    "-p,--port"      => \$database{port},
    "-s,--server"    => \$database{host},
    "-t,--table"     => \$database{table},
    "-u,--user"      => \$database{user},
    "--password"     => \$database{password},
    "--platform"     => \$database{platform},

    "-L,--limit"     => \$limit,
    "-L-,--no-limit" => sub { $limit = undef },

    "-F,--float-format" => sub { $atom_fformat = $cell_fformat = get_value() },
    "-A,--atom-format" => \$atom_fformat,
    "-C,--cell-format" => \$cell_fformat,

    "--debug"        => sub { $debug = 1 },
    "--no-debug"     => sub { $debug = 0 },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

my $connected_db = database_connect( \%database );

my @structure_ids = @ARGV;

if( !@structure_ids ) {
    @structure_ids = query_OQMD_structure_ids( $connected_db, $limit );
}

if( $debug ) {
    use COD::Serialise qw( serialiseRef );
}

for my $structure_id (@structure_ids) {
    print "data_", $structure_id, "\n";

    my $structure_data = query_OQMD_structure( $connected_db, $structure_id );

    serialiseRef( $structure_data ) if $debug;

    my @cell_vectors = (
        [ $structure_data->{x1}, $structure_data->{x2}, $structure_data->{x3} ],
        [ $structure_data->{y1}, $structure_data->{y2}, $structure_data->{y3} ],
        [ $structure_data->{z1}, $structure_data->{z2}, $structure_data->{z3} ],
        );

    my @cell = vectors2cell( @cell_vectors );

    printf "%-32s ".$cell_fformat."\n", "_cell_length_a", $cell[0];
    printf "%-32s ".$cell_fformat."\n", "_cell_length_b", $cell[1];
    printf "%-32s ".$cell_fformat."\n", "_cell_length_c", $cell[2];
    printf "%-32s ".$cell_fformat."\n", "_cell_angle_alpha", $cell[3];
    printf "%-32s ".$cell_fformat."\n", "_cell_angle_beta",  $cell[4];
    printf "%-32s ".$cell_fformat."\n", "_cell_angle_gamma", $cell[5];

    print_single_tag_and_value( '_chemical_formula_sum',
                                $structure_data->{composition_id} );
    print_single_tag_and_value( '_space_group_name_H-M_alt', 'P 1' );
    print_single_tag_and_value( '_space_group_name_Hall', ' P 1' );

    print "loop_\n";
    print "_space_group_symop_id\n";
    print "_space_group_symop_operation_xyz\n";
    print "1 x,y,z\n";

    # Get fractional atom coordinates from the OQMD database:

    my $atoms = query_OQMD_atoms( $connected_db, $structure_id );

    serialiseRef( $atoms ) if $debug;

    # Calculate Z:
    my $formula_parser = new COD::Formulae::Parser::AdHoc->new;
    my $formula_atoms =
        $formula_parser->ParseString( $structure_data->{composition_id} );

    my $coord_atom_count = scalar(keys %{$atoms});
    if( $formula_atoms ) {
        my $formula_atom_count = scalar(values %{$formula_atoms});
        print_single_tag_and_value( '_cell_formula_units_Z',
                                    $coord_atom_count/$formula_atom_count );
    }

    print_single_tag_and_value( '_oqmd_database_code', $structure_id );
    print_single_tag_and_value( '_oqmd_structure_label',
                                $structure_data->{label} );

    if( defined $structure_data->{spacegroup_id} ) {
        my $spacegroup_data =
            query_OQMD_spacegroup( $connected_db,
                                   $structure_data->{spacegroup_id} );
        serialiseRef( $spacegroup_data ) if $debug;
        print_single_tag_and_value( '_oqmd_space_group_name_H-M',
                                    $spacegroup_data->{hm} );
        print_single_tag_and_value( '_oqmd_space_group_name_Hall',
                                    $spacegroup_data->{hall} );
        print_single_tag_and_value( '_oqmd_space_group_crystal_system',
                                    lc($spacegroup_data->{lattice_system}) );
        print_single_tag_and_value( '_oqmd_space_group_IT_number',
                                    lc($spacegroup_data->{number}) );
    }

    my $calculation = query_OQMD_calculation( $connected_db,
                                              $structure_id );

    if( $calculation ) {
        print_single_tag_and_value( '_oqmd_calculation_label',
                                    $calculation->{label} );

        if( $calculation->{input_id} ) {
            for my $part ('cell_param', 'coordinate') {
                print_single_tag_and_value(
                    "_tcod_initial_${part}_db_name",
                    'Open Quantum Materials Database' );
                print_single_tag_and_value(
                    "_tcod_initial_${part}_db_code",
                    $calculation->{input_id} );
                print_single_tag_and_value(
                    "_tcod_initial_${part}_URI",
                    'http://oqmd.org/materials/structure/' .
                    $calculation->{input_id} );
            }
        }

        print_single_tag_and_value( '_oqmd_calculation_converged',
                                    $calculation->{converged} ? 'yes' : 'no' );
        print_single_tag_and_value( '_tcod_total_energy',
                                    $calculation->{energy_pa} );
        print_single_tag_and_value( '_tcod_computation_wallclock_time',
                                    $calculation->{runtime} );

        # Fixing non-conforming JSON

        $calculation->{settings} =~ s/"/\"/g;
        $calculation->{settings} =~ s/'/"/g;

        $calculation->{settings} =~ s/True/true/g;
        $calculation->{settings} =~ s/False/false/g;

        my $settings = decode_json( $calculation->{settings} );

        print_single_tag_and_value( '_dft_kinetic_energy_cutoff_wavefunctions',
                                    $settings->{encut} );
        print_single_tag_and_value( '_dft_cell_energy_conv',
                                    $settings->{ediff} );
        print_single_tag_and_value( '_dft_BZ_integration_smearing_width',
                                    $settings->{sigma} );

        if( $settings->{ismear} > 0 ) {
            print_single_tag_and_value(
                '_dft_BZ_integration_smearing_method',
                'Methfessel-Paxton' );
            print_single_tag_and_value(
                '_dft_BZ_integration_MP_order', $settings->{ismear} );
        } elsif( exists $smearing_methods{$settings->{ismear}} ) {
            if( exists $smearing_methods{$settings->{ismear}}->{main} ) {
                print_single_tag_and_value(
                    '_dft_BZ_integration_smearing_method',
                    $smearing_methods{$settings->{ismear}}->{main} );
            } else {
                print_single_tag_and_value(
                    '_dft_BZ_integration_smearing_method_other',
                    $smearing_methods{$settings->{ismear}}->{other} );
            }
        }

        if( $calculation->{band_gap} ) {
            print_single_tag_and_value( '_dft_band_gap',
                                        $calculation->{band_gap} );
        }

        if( $calculation->{magmom_pa} ) {
            print_single_tag_and_value( '_dft_cell_magn_moment',
                                        $calculation->{magmom_pa} );
        }

        if( $calculation->{delta_e} ) {
            print_single_tag_and_value( '_dft_lattice_energy',
                                        $calculation->{delta_e} *
                                        $eV_per_atom_to_J_per_mol );
        }

        my $metadata = query_OQMD_calculation_metadata( $connected_db,
                                                        $calculation->{id} );

        if( $metadata && exists $metadata->{error} ) {
            print_single_tag_and_value( '_oqmd_error_flag', 'error' );
            print_single_tag_and_value( '_oqmd_error_description',
                "\n  The structure is marked with error flag(s)\n  " .
                join( ', ', map { "\"$_\"" } @{$metadata->{error}} ) .
                "\n  in the source database." );
        }
    }

    print "loop_\n";
    print "_atom_site_label\n";
    print "_atom_site_fract_x\n";
    print "_atom_site_fract_y\n";
    print "_atom_site_fract_z\n";
    print "_atom_site_occupancy\n";
    for my $atom_id (sort keys %{$atoms}) {
        my $atom = $atoms->{$atom_id};
        print $atom->{element_id};
        printf " ".$atom_fformat, $atom->{x};
        printf " ".$atom_fformat, $atom->{y};
        printf " ".$atom_fformat, $atom->{z};
        print " ".$atom->{occupancy};
        print "\n";
    }
    if( !$coord_atom_count ) {
        print "? ? ? ? ?\n";
    }

}

database_disconnect( $connected_db );

#------------------------------------------------------------------------------

sub database_connect
{
    my ( $database ) = @_;

    my $dbh = DBI->connect( "dbi:$database->{platform}:" .
                            "hostname=$database->{host};".
                            "dbname=$database->{name};".
                            "user=$database->{user};".
                            "password=$database->{password}" )
        || die "cannot connect do the database -- $DBI::errstr";

    my %connected_db = %{$database};

    $connected_db{dbh} = $dbh;

    return wantarray ? %connected_db : \%connected_db;
}

sub database_disconnect
{
    my ( $database ) = @_;

    $database->{dbh}->disconnect || die "cannot disconnect -- $DBI::errstr";

    $database->{dbh} = undef;

    return;
}

sub query_OQMD_structure_ids
{
    my ( $database, $limit ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT id FROM `$database->{tables}{structures}` ORDER BY id" .
        (defined $limit ? " LIMIT " . $limit : "") . ";"
        );

    $sth->execute();

    my @ids = map { $_->[0] } @{$sth->fetchall_arrayref()};

    return @ids;
}

sub query_OQMD_structure
{
    my ( $database, $id ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT id, x1, x2, x3, y1, y2, y3, z1, z2, z3,
                entry_id, label, composition_id, spacegroup_id
         FROM `$database->{tables}{structures}` WHERE id = ?;"
        );

    $sth->execute( $id );

    return $sth->fetchrow_hashref();
}

sub query_OQMD_calculation
{
    my ( $database, $id ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT C.id, label, input_id, settings, energy_pa, magmom_pa,
                band_gap, converged, runtime, delta_e
         FROM `$database->{tables}{calculations}` AS C
         LEFT JOIN `$database->{tables}{formation_energies}`
            ON calculation_id = C.id
         WHERE output_id = ?;"
        );

    $sth->execute( $id );

    return $sth->fetchrow_hashref();
}

sub query_OQMD_calculation_metadata
{
    my ( $database, $calculation_id ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT type, value
         FROM `$database->{tables}{calculations_meta_data}` AS CMD
         JOIN `$database->{tables}{meta_data}` AS MD
            ON CMD.metadata_id = MD.id
         WHERE CMD.calculation_id = ?;"
        );

    $sth->execute( $calculation_id );

    my %metadata;
    while( my $row = $sth->fetchrow_hashref() ) {
        $metadata{$row->{type}} = [] if !exists $metadata{$row->{type}};
        push( @{$metadata{$row->{type}}}, $row->{value} );
    }

    return \%metadata;
}

sub query_OQMD_spacegroup
{
    my ( $database, $spacegroup_number ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT number, hm, hall, schoenflies, lattice_system
         FROM `$database->{tables}{spacegroups}` WHERE number = ?;"
        );

    $sth->execute( $spacegroup_number );

    return $sth->fetchrow_hashref();
}

sub query_OQMD_atoms
{
    my ( $database, $id ) = @_;

    my $dbh = $database->{dbh};

    my $sth = $dbh->prepare(
        "SELECT atoms.id as atom_id, element_id,
                atoms.x as x, atoms.y as y, atoms.z as z,
                sites.x as sx, sites.y as sy, sites.z as sz,
                fx, fy, fz, magmom, charge, occupancy,
                symbol, multiplicity, spacegroup_id,
                wyckoffsites.x as wx,
                wyckoffsites.x as wy,
                wyckoffsites.z as wz
         FROM `$database->{tables}{atoms}`
         JOIN `$database->{tables}{sites}`
             ON site_id = `$database->{tables}{sites}`.id
         JOIN `$database->{tables}{wyckoffsites}`
             ON `$database->{tables}{sites}`.wyckoff_id =
                `$database->{tables}{wyckoffsites}`.id
         WHERE `$database->{tables}{atoms}`.structure_id = ?;"
        );

    $sth->execute( $id );

    return $sth->fetchall_hashref( "atom_id" );
}
