Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Molstar
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Michal Malý
Molstar
Commits
f1a4053c
Commit
f1a4053c
authored
7 years ago
by
Alexander Rose
Browse files
Options
Downloads
Patches
Plain Diff
wip, combine-mmcif script
parent
75c68689
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/apps/combine-mmcif/index.ts
+162
-48
162 additions, 48 deletions
src/apps/combine-mmcif/index.ts
with
162 additions
and
48 deletions
src/apps/combine-mmcif/index.ts
+
162
−
48
View file @
f1a4053c
...
...
@@ -7,17 +7,20 @@
import
*
as
argparse
from
'
argparse
'
import
*
as
util
from
'
util
'
import
*
as
fs
from
'
fs
'
import
*
as
zlib
from
'
zlib
'
import
fetch
from
'
node-fetch
'
require
(
'
util.promisify
'
).
shim
();
const
readFileAsync
=
util
.
promisify
(
fs
.
readFile
);
const
readFile
=
util
.
promisify
(
fs
.
readFile
);
const
writeFile
=
util
.
promisify
(
fs
.
writeFile
);
import
{
Database
Collection
,
Database
,
Table
}
from
'
mol-data/db
'
import
{
Database
,
Table
,
DatabaseCollection
}
from
'
mol-data/db
'
import
CIF
from
'
mol-io/reader/cif
'
// import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd'
import
*
as
Encoder
from
'
mol-io/writer/cif
'
import
Computation
from
'
mol-util/computation
'
import
{
mmCIF_Schema
}
from
'
mol-io/reader/cif/schema/mmcif
'
;
import
{
mmCIF_Schema
,
mmCIF_Database
}
from
'
mol-io/reader/cif/schema/mmcif
'
;
import
{
CCD_Schema
}
from
'
mol-io/reader/cif/schema/ccd
'
;
import
{
BIRD_Schema
}
from
'
mol-io/reader/cif/schema/bird
'
;
// import { Table } from 'mol-io/reader/csv/data-model';
// import { Model, Structure } from 'mol-model/structure'
...
...
@@ -30,7 +33,11 @@ export async function ensureAvailable(path: string, url: string) {
if
(
!
fs
.
existsSync
(
DATA_DIR
))
{
fs
.
mkdirSync
(
DATA_DIR
);
}
fs
.
writeFileSync
(
path
,
await
data
.
text
())
if
(
url
.
endsWith
(
'
.gz
'
))
{
await
writeFile
(
path
,
zlib
.
gunzipSync
(
await
data
.
buffer
()))
}
else
{
await
writeFile
(
path
,
await
data
.
text
())
}
console
.
log
(
`done downloading
${
url
}
`
)
}
}
...
...
@@ -38,30 +45,28 @@ export async function ensureAvailable(path: string, url: string) {
export
async
function
ensureDataAvailable
()
{
await
ensureAvailable
(
CCD_PATH
,
CCD_URL
)
await
ensureAvailable
(
PVCD_PATH
,
PVCD_URL
)
await
ensureAvailable
(
BIRD_PATH
,
BIRD_URL
)
}
function
showProgress
(
tag
:
string
,
p
:
Computation
.
Progress
)
{
console
.
log
(
`[
${
tag
}
]
${
p
.
message
}
${
p
.
isIndeterminate
?
''
:
(
p
.
current
/
p
.
max
*
100
).
toFixed
(
2
)
+
'
%
'
}
(
${
p
.
elapsedMs
|
0
}
ms)`
)
}
export
async
function
readCCD
(
bcif
=
false
)
{
const
parsed
=
await
parseCif
(
await
readFileAsync
(
CCD_PATH
,
'
utf8
'
))
const
ccd
:
DatabaseCollection
<
CCD_Schema
>
=
{}
for
(
const
data
of
parsed
.
result
.
blocks
)
{
// console.log(data.header)
ccd
[
data
.
header
]
=
CIF
.
schema
.
CCD
(
data
)
}
return
ccd
;
export
async
function
readFileAsCollection
<
S
extends
Database
.
Schema
>
(
path
:
string
,
schema
:
S
)
{
const
parsed
=
await
parseCif
(
await
readFile
(
path
,
'
utf8
'
))
return
CIF
.
toDatabaseCollection
(
schema
,
parsed
.
result
)
}
export
async
function
readPVCD
(
bcif
=
false
)
{
const
parsed
=
await
parseCif
(
await
readFileAsync
(
PVCD_PATH
,
'
utf8
'
))
const
ccd
:
DatabaseCollection
<
CCD_Schema
>
=
{}
for
(
const
data
of
parsed
.
result
.
blocks
)
{
// console.log(data.header)
ccd
[
data
.
header
]
=
CIF
.
schema
.
CCD
(
data
)
}
return
ccd
;
export
async
function
readCCD
()
{
return
readFileAsCollection
(
CCD_PATH
,
CCD_Schema
)
}
export
async
function
readPVCD
()
{
return
readFileAsCollection
(
PVCD_PATH
,
CCD_Schema
)
}
export
async
function
readBIRD
()
{
return
readFileAsCollection
(
BIRD_PATH
,
BIRD_Schema
)
}
export
async
function
getCCD
()
{
...
...
@@ -69,6 +74,11 @@ export async function getCCD() {
return
readPVCD
()
}
export
async
function
getBIRD
()
{
await
ensureDataAvailable
()
return
readBIRD
()
}
async
function
parseCif
(
data
:
string
|
Uint8Array
)
{
const
comp
=
CIF
.
parse
(
data
)
const
ctx
=
Computation
.
observable
({
...
...
@@ -97,53 +107,157 @@ export async function getPdb(pdb: string) {
return
CIF
.
schema
.
mmCIF
(
parsed
.
result
.
blocks
[
0
])
}
type
extraTables
=
{
chem_comp_bond
:
Table
<
mmCIF_Schema
[
'
chem_comp_bond
'
]
>
,
pdbx_reference_entity_list
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_list
'
]
>
,
pdbx_reference_entity_link
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_link
'
]
>
,
pdbx_reference_entity_poly_link
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_poly_link
'
]
>
,
}
type
extraTablesLists
=
{
[
k
in
keyof
extraTables
]:
extraTables
[
k
][]
}
export
function
getExtraTables
(
mmcif
:
mmCIF_Database
,
ccd
:
DatabaseCollection
<
CCD_Schema
>
,
bird
:
DatabaseCollection
<
BIRD_Schema
>
)
{
const
extraTablesLists
:
extraTablesLists
=
{
chem_comp_bond
:
[],
pdbx_reference_entity_list
:
[],
pdbx_reference_entity_link
:
[],
pdbx_reference_entity_poly_link
:
[]
}
for
(
let
i
=
0
,
n
=
mmcif
.
chem_comp
.
_rowCount
;
i
<
n
;
++
i
)
{
const
ccdId
=
mmcif
.
chem_comp
.
id
.
value
(
i
);
if
(
ccdId
in
ccd
)
{
extraTablesLists
.
chem_comp_bond
.
push
(
ccd
[
ccdId
].
chem_comp_bond
)
}
else
{
console
.
error
(
`ccdId
${
ccdId
}
not found`
)
}
}
for
(
let
i
=
0
,
n
=
mmcif
.
pdbx_molecule_features
.
_rowCount
;
i
<
n
;
++
i
)
{
const
birdId
=
mmcif
.
pdbx_molecule_features
.
prd_id
.
value
(
i
);
if
(
birdId
in
bird
)
{
const
e
=
bird
[
birdId
]
extraTablesLists
.
pdbx_reference_entity_list
.
push
(
e
.
pdbx_reference_entity_list
)
extraTablesLists
.
pdbx_reference_entity_link
.
push
(
e
.
pdbx_reference_entity_link
)
extraTablesLists
.
pdbx_reference_entity_poly_link
.
push
(
e
.
pdbx_reference_entity_poly_link
)
}
else
{
console
.
error
(
`birdId
${
birdId
}
not found`
)
}
}
const
extraTables
:
extraTables
=
Object
.
assign
({},
...
Object
.
keys
(
extraTablesLists
).
map
(
k
=>
{
// TODO how to avoid type casting?
return
{
[
k
]:
Table
.
concat
((
extraTablesLists
as
any
)[
k
],
(
mmCIF_Schema
as
any
)[
k
])
}
}))
return
extraTables
}
type
PartialStructConnRow
=
Partial
<
Table
.
Row
<
mmCIF_Schema
[
'
struct_conn
'
]
>>
export
function
getStructConnValueOrder
(
value
:
any
):
mmCIF_Schema
[
'
struct_conn
'
][
'
pdbx_value_order
'
][
'
T
'
]
{
return
mmCIF_Schema
.
struct_conn
.
pdbx_value_order
[
'
T
'
].
includes
(
value
)
?
value
:
'
sing
'
}
export
function
getBirdBonds
(
mmcif
:
mmCIF_Database
)
{
const
bonds
:
PartialStructConnRow
[]
=
[]
const
mol
=
mmcif
.
pdbx_molecule
const
molFeat
=
mmcif
.
pdbx_molecule_features
for
(
let
i
=
0
,
n
=
molFeat
.
_rowCount
;
i
<
n
;
++
i
)
{
// console.log(Table.getRow(molFeat, i))
const
instancesAsymIdList
:
{
[
k
:
number
]:
string
[]
}
=
{}
for
(
let
j
=
0
,
m
=
mol
.
_rowCount
;
j
<
m
;
++
j
)
{
if
(
mol
.
prd_id
.
value
(
j
)
===
molFeat
.
prd_id
.
value
(
i
))
{
// console.log(Table.getRow(mol, j))
const
instanceId
=
mol
.
instance_id
.
value
(
j
)
if
(
instancesAsymIdList
[
instanceId
]
===
undefined
)
{
instancesAsymIdList
[
instanceId
]
=
[]
}
instancesAsymIdList
[
instanceId
].
push
(
mol
.
asym_id
.
value
(
j
))
}
}
// console.log(instancesAsymIdList)
const
entityLink
=
mmcif
.
pdbx_reference_entity_link
for
(
const
instanceId
of
Object
.
keys
(
instancesAsymIdList
))
{
const
asymIdList
=
instancesAsymIdList
[
instanceId
as
any
]
for
(
let
j
=
0
,
m
=
entityLink
.
_rowCount
;
j
<
m
;
++
j
)
{
if
(
entityLink
.
prd_id
.
value
(
j
)
===
molFeat
.
prd_id
.
value
(
i
))
{
// console.log(Table.getRow(entityLink, j))
const
link
:
PartialStructConnRow
=
{
ptnr1_label_asym_id
:
asymIdList
[
entityLink
.
component_1
.
value
(
j
)
-
1
],
ptnr1_label_atom_id
:
entityLink
.
atom_id_1
.
value
(
j
),
ptnr1_label_comp_id
:
entityLink
.
comp_id_1
.
value
(
j
),
ptnr1_label_seq_id
:
entityLink
.
entity_seq_num_1
.
value
(
j
),
ptnr2_label_asym_id
:
asymIdList
[
entityLink
.
component_2
.
value
(
j
)
-
1
],
ptnr2_label_atom_id
:
entityLink
.
atom_id_2
.
value
(
j
),
ptnr2_label_comp_id
:
entityLink
.
comp_id_2
.
value
(
j
),
ptnr2_label_seq_id
:
entityLink
.
entity_seq_num_2
.
value
(
j
),
pdbx_value_order
:
getStructConnValueOrder
(
entityLink
.
value_order
.
value
(
j
)),
}
// console.log(link)
bonds
.
push
(
link
)
}
}
}
}
return
bonds
}
export
function
getCcdBonds
(
mmcif
:
mmCIF_Database
)
{
const
bonds
:
PartialStructConnRow
[]
=
[]
}
async
function
run
(
pdb
:
string
,
out
?:
string
)
{
const
ccd
=
await
getCCD
()
//
cons
ole.log(Object.keys(ccd).length
)
cons
t
bird
=
await
getBIRD
(
)
const
mmcif
=
await
getPdb
(
pdb
)
// console.log(mmcif.chem_comp.id.toArray())
// const chemCompBond_Schema = { chem_comp_bond: mmCIF_Schema.chem_comp_bond }
// type chemCompBond_Schema = typeof chemCompBond_Schema;
// type chemCompBond_Database = Database<chemCompBond_Schema>;
// interface chemCompBond_Database extends Database<chemCompBond_Schema> {}
for
(
const
k
of
Object
.
keys
(
bird
))
{
const
entity
=
bird
[
k
].
pdbx_reference_entity_list
for
(
let
i
=
0
,
n
=
entity
.
_rowCount
;
i
<
n
;
++
i
)
{
if
(
entity
.
ref_entity_id
.
value
(
i
)
!==
entity
.
component_id
.
value
(
i
).
toString
())
{
console
.
log
(
Table
.
getRow
(
entity
,
i
))
}
}
const
chemCompBondTables
:
Table
<
mmCIF_Schema
[
'
chem_comp_bond
'
]
>
[]
=
[]
const
link
=
bird
[
k
].
pdbx_reference_entity_link
for
(
let
i
=
0
,
n
=
link
.
_rowCount
;
i
<
n
;
++
i
)
{
if
(
link
.
value_order
.
value
(
i
)
!==
'
sing
'
)
{
console
.
log
(
Table
.
getRow
(
link
,
i
))
}
}
for
(
let
i
=
0
,
n
=
mmcif
.
chem_comp
.
_rowCount
;
i
<
n
;
++
i
)
{
const
ccdId
=
mmcif
.
chem_comp
.
id
.
value
(
i
);
// console.log(ccdId)
if
(
ccdId
in
ccd
)
{
// console.log(`ccdId ${ccdId} has ${ccd[ccdId].chem_comp_atom._rowCount} atoms`)
// console.log(ccd[ccdId].chem_comp_bond.atom_id_1.toArray())
chemCompBondTables
.
push
(
ccd
[
ccdId
].
chem_comp_bond
)
}
else
{
console
.
error
(
`ccdId
${
ccdId
}
not found`
)
const
polyLink
=
bird
[
k
].
pdbx_reference_entity_poly_link
for
(
let
i
=
0
,
n
=
link
.
_rowCount
;
i
<
n
;
++
i
)
{
if
(
polyLink
.
value_order
.
value
(
i
)
!==
'
sing
'
)
{
console
.
log
(
Table
.
getRow
(
polyLink
,
i
))
}
}
}
// for (const k of Object.keys(ccd)) {
// console.log(k)
// }
const
extraTables
=
getExtraTables
(
mmcif
,
ccd
,
bird
)
const
combinedMmcif
=
Database
.
ofTables
(
'
mmcif_combined
'
,
mmCIF_Schema
,
Object
.
assign
({},
mmcif
,
extraTables
))
// console.log(getEncodedCif(pdb, combinedMmcif))
// console.log(Database.getTablesAsRows(combinedMmcif))
// const combinedChemCompBonds = Database.ofTables('chemCompBond', chemCompBond_Schema, {
// chem_comp_bond: Table.concat(chemCompBondTables, mmCIF_Schema.chem_comp_bond)
// })
// console.log('concat done')
// console.log(getEncodedCif('chemCompBond', combinedChemCompBonds))
const
combinedMmcif
=
Database
.
ofTables
(
'
chemCompBond
'
,
mmCIF_Schema
,
Object
.
assign
({},
mmcif
,
{
chem_comp_bond
:
Table
.
concat
(
chemCompBondTables
,
mmCIF_Schema
.
chem_comp_bond
)
}
))
console
.
log
(
getEncodedCif
(
pdb
,
combinedMmcif
))
// console.log(getBirdBonds(combinedMmcif))
console
.
log
(
getCcdBonds
(
combinedMmcif
))
}
const
DATA_DIR
=
'
./build/data
'
const
CCD_PATH
=
`
${
DATA_DIR
}
/components.cif`
const
PVCD_PATH
=
`
${
DATA_DIR
}
/aa-variants-v1.cif`
const
BIRD_PATH
=
`
${
DATA_DIR
}
/prd-all.cif`
const
CCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
'
const
PVCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/aa-variants-v1.cif
'
const
BIRD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/bird/prd/prd-all.cif.gz
'
const
parser
=
new
argparse
.
ArgumentParser
({
addHelp
:
true
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment