Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Molstar
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Michal Malý
Molstar
Commits
f1a4053c
Commit
f1a4053c
authored
7 years ago
by
Alexander Rose
Browse files
Options
Downloads
Patches
Plain Diff
wip, combine-mmcif script
parent
75c68689
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/apps/combine-mmcif/index.ts
+162
-48
162 additions, 48 deletions
src/apps/combine-mmcif/index.ts
with
162 additions
and
48 deletions
src/apps/combine-mmcif/index.ts
+
162
−
48
View file @
f1a4053c
...
@@ -7,17 +7,20 @@
...
@@ -7,17 +7,20 @@
import
*
as
argparse
from
'
argparse
'
import
*
as
argparse
from
'
argparse
'
import
*
as
util
from
'
util
'
import
*
as
util
from
'
util
'
import
*
as
fs
from
'
fs
'
import
*
as
fs
from
'
fs
'
import
*
as
zlib
from
'
zlib
'
import
fetch
from
'
node-fetch
'
import
fetch
from
'
node-fetch
'
require
(
'
util.promisify
'
).
shim
();
require
(
'
util.promisify
'
).
shim
();
const
readFileAsync
=
util
.
promisify
(
fs
.
readFile
);
const
readFile
=
util
.
promisify
(
fs
.
readFile
);
const
writeFile
=
util
.
promisify
(
fs
.
writeFile
);
import
{
Database
Collection
,
Database
,
Table
}
from
'
mol-data/db
'
import
{
Database
,
Table
,
DatabaseCollection
}
from
'
mol-data/db
'
import
CIF
from
'
mol-io/reader/cif
'
import
CIF
from
'
mol-io/reader/cif
'
// import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd'
// import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd'
import
*
as
Encoder
from
'
mol-io/writer/cif
'
import
*
as
Encoder
from
'
mol-io/writer/cif
'
import
Computation
from
'
mol-util/computation
'
import
Computation
from
'
mol-util/computation
'
import
{
mmCIF_Schema
}
from
'
mol-io/reader/cif/schema/mmcif
'
;
import
{
mmCIF_Schema
,
mmCIF_Database
}
from
'
mol-io/reader/cif/schema/mmcif
'
;
import
{
CCD_Schema
}
from
'
mol-io/reader/cif/schema/ccd
'
;
import
{
CCD_Schema
}
from
'
mol-io/reader/cif/schema/ccd
'
;
import
{
BIRD_Schema
}
from
'
mol-io/reader/cif/schema/bird
'
;
// import { Table } from 'mol-io/reader/csv/data-model';
// import { Table } from 'mol-io/reader/csv/data-model';
// import { Model, Structure } from 'mol-model/structure'
// import { Model, Structure } from 'mol-model/structure'
...
@@ -30,7 +33,11 @@ export async function ensureAvailable(path: string, url: string) {
...
@@ -30,7 +33,11 @@ export async function ensureAvailable(path: string, url: string) {
if
(
!
fs
.
existsSync
(
DATA_DIR
))
{
if
(
!
fs
.
existsSync
(
DATA_DIR
))
{
fs
.
mkdirSync
(
DATA_DIR
);
fs
.
mkdirSync
(
DATA_DIR
);
}
}
fs
.
writeFileSync
(
path
,
await
data
.
text
())
if
(
url
.
endsWith
(
'
.gz
'
))
{
await
writeFile
(
path
,
zlib
.
gunzipSync
(
await
data
.
buffer
()))
}
else
{
await
writeFile
(
path
,
await
data
.
text
())
}
console
.
log
(
`done downloading
${
url
}
`
)
console
.
log
(
`done downloading
${
url
}
`
)
}
}
}
}
...
@@ -38,30 +45,28 @@ export async function ensureAvailable(path: string, url: string) {
...
@@ -38,30 +45,28 @@ export async function ensureAvailable(path: string, url: string) {
export
async
function
ensureDataAvailable
()
{
export
async
function
ensureDataAvailable
()
{
await
ensureAvailable
(
CCD_PATH
,
CCD_URL
)
await
ensureAvailable
(
CCD_PATH
,
CCD_URL
)
await
ensureAvailable
(
PVCD_PATH
,
PVCD_URL
)
await
ensureAvailable
(
PVCD_PATH
,
PVCD_URL
)
await
ensureAvailable
(
BIRD_PATH
,
BIRD_URL
)
}
}
function
showProgress
(
tag
:
string
,
p
:
Computation
.
Progress
)
{
function
showProgress
(
tag
:
string
,
p
:
Computation
.
Progress
)
{
console
.
log
(
`[
${
tag
}
]
${
p
.
message
}
${
p
.
isIndeterminate
?
''
:
(
p
.
current
/
p
.
max
*
100
).
toFixed
(
2
)
+
'
%
'
}
(
${
p
.
elapsedMs
|
0
}
ms)`
)
console
.
log
(
`[
${
tag
}
]
${
p
.
message
}
${
p
.
isIndeterminate
?
''
:
(
p
.
current
/
p
.
max
*
100
).
toFixed
(
2
)
+
'
%
'
}
(
${
p
.
elapsedMs
|
0
}
ms)`
)
}
}
export
async
function
readCCD
(
bcif
=
false
)
{
export
async
function
readFileAsCollection
<
S
extends
Database
.
Schema
>
(
path
:
string
,
schema
:
S
)
{
const
parsed
=
await
parseCif
(
await
readFileAsync
(
CCD_PATH
,
'
utf8
'
))
const
parsed
=
await
parseCif
(
await
readFile
(
path
,
'
utf8
'
))
const
ccd
:
DatabaseCollection
<
CCD_Schema
>
=
{}
return
CIF
.
toDatabaseCollection
(
schema
,
parsed
.
result
)
for
(
const
data
of
parsed
.
result
.
blocks
)
{
// console.log(data.header)
ccd
[
data
.
header
]
=
CIF
.
schema
.
CCD
(
data
)
}
}
return
ccd
;
export
async
function
readCCD
()
{
return
readFileAsCollection
(
CCD_PATH
,
CCD_Schema
)
}
}
export
async
function
readPVCD
(
bcif
=
false
)
{
export
async
function
readPVCD
()
{
const
parsed
=
await
parseCif
(
await
readFileAsync
(
PVCD_PATH
,
'
utf8
'
))
return
readFileAsCollection
(
PVCD_PATH
,
CCD_Schema
)
const
ccd
:
DatabaseCollection
<
CCD_Schema
>
=
{}
for
(
const
data
of
parsed
.
result
.
blocks
)
{
// console.log(data.header)
ccd
[
data
.
header
]
=
CIF
.
schema
.
CCD
(
data
)
}
}
return
ccd
;
export
async
function
readBIRD
()
{
return
readFileAsCollection
(
BIRD_PATH
,
BIRD_Schema
)
}
}
export
async
function
getCCD
()
{
export
async
function
getCCD
()
{
...
@@ -69,6 +74,11 @@ export async function getCCD() {
...
@@ -69,6 +74,11 @@ export async function getCCD() {
return
readPVCD
()
return
readPVCD
()
}
}
export
async
function
getBIRD
()
{
await
ensureDataAvailable
()
return
readBIRD
()
}
async
function
parseCif
(
data
:
string
|
Uint8Array
)
{
async
function
parseCif
(
data
:
string
|
Uint8Array
)
{
const
comp
=
CIF
.
parse
(
data
)
const
comp
=
CIF
.
parse
(
data
)
const
ctx
=
Computation
.
observable
({
const
ctx
=
Computation
.
observable
({
...
@@ -97,53 +107,157 @@ export async function getPdb(pdb: string) {
...
@@ -97,53 +107,157 @@ export async function getPdb(pdb: string) {
return
CIF
.
schema
.
mmCIF
(
parsed
.
result
.
blocks
[
0
])
return
CIF
.
schema
.
mmCIF
(
parsed
.
result
.
blocks
[
0
])
}
}
type
extraTables
=
{
chem_comp_bond
:
Table
<
mmCIF_Schema
[
'
chem_comp_bond
'
]
>
,
pdbx_reference_entity_list
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_list
'
]
>
,
pdbx_reference_entity_link
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_link
'
]
>
,
pdbx_reference_entity_poly_link
:
Table
<
mmCIF_Schema
[
'
pdbx_reference_entity_poly_link
'
]
>
,
}
type
extraTablesLists
=
{
[
k
in
keyof
extraTables
]:
extraTables
[
k
][]
}
export
function
getExtraTables
(
mmcif
:
mmCIF_Database
,
ccd
:
DatabaseCollection
<
CCD_Schema
>
,
bird
:
DatabaseCollection
<
BIRD_Schema
>
)
{
const
extraTablesLists
:
extraTablesLists
=
{
chem_comp_bond
:
[],
pdbx_reference_entity_list
:
[],
pdbx_reference_entity_link
:
[],
pdbx_reference_entity_poly_link
:
[]
}
for
(
let
i
=
0
,
n
=
mmcif
.
chem_comp
.
_rowCount
;
i
<
n
;
++
i
)
{
const
ccdId
=
mmcif
.
chem_comp
.
id
.
value
(
i
);
if
(
ccdId
in
ccd
)
{
extraTablesLists
.
chem_comp_bond
.
push
(
ccd
[
ccdId
].
chem_comp_bond
)
}
else
{
console
.
error
(
`ccdId
${
ccdId
}
not found`
)
}
}
for
(
let
i
=
0
,
n
=
mmcif
.
pdbx_molecule_features
.
_rowCount
;
i
<
n
;
++
i
)
{
const
birdId
=
mmcif
.
pdbx_molecule_features
.
prd_id
.
value
(
i
);
if
(
birdId
in
bird
)
{
const
e
=
bird
[
birdId
]
extraTablesLists
.
pdbx_reference_entity_list
.
push
(
e
.
pdbx_reference_entity_list
)
extraTablesLists
.
pdbx_reference_entity_link
.
push
(
e
.
pdbx_reference_entity_link
)
extraTablesLists
.
pdbx_reference_entity_poly_link
.
push
(
e
.
pdbx_reference_entity_poly_link
)
}
else
{
console
.
error
(
`birdId
${
birdId
}
not found`
)
}
}
const
extraTables
:
extraTables
=
Object
.
assign
({},
...
Object
.
keys
(
extraTablesLists
).
map
(
k
=>
{
// TODO how to avoid type casting?
return
{
[
k
]:
Table
.
concat
((
extraTablesLists
as
any
)[
k
],
(
mmCIF_Schema
as
any
)[
k
])
}
}))
return
extraTables
}
type
PartialStructConnRow
=
Partial
<
Table
.
Row
<
mmCIF_Schema
[
'
struct_conn
'
]
>>
export
function
getStructConnValueOrder
(
value
:
any
):
mmCIF_Schema
[
'
struct_conn
'
][
'
pdbx_value_order
'
][
'
T
'
]
{
return
mmCIF_Schema
.
struct_conn
.
pdbx_value_order
[
'
T
'
].
includes
(
value
)
?
value
:
'
sing
'
}
export
function
getBirdBonds
(
mmcif
:
mmCIF_Database
)
{
const
bonds
:
PartialStructConnRow
[]
=
[]
const
mol
=
mmcif
.
pdbx_molecule
const
molFeat
=
mmcif
.
pdbx_molecule_features
for
(
let
i
=
0
,
n
=
molFeat
.
_rowCount
;
i
<
n
;
++
i
)
{
// console.log(Table.getRow(molFeat, i))
const
instancesAsymIdList
:
{
[
k
:
number
]:
string
[]
}
=
{}
for
(
let
j
=
0
,
m
=
mol
.
_rowCount
;
j
<
m
;
++
j
)
{
if
(
mol
.
prd_id
.
value
(
j
)
===
molFeat
.
prd_id
.
value
(
i
))
{
// console.log(Table.getRow(mol, j))
const
instanceId
=
mol
.
instance_id
.
value
(
j
)
if
(
instancesAsymIdList
[
instanceId
]
===
undefined
)
{
instancesAsymIdList
[
instanceId
]
=
[]
}
instancesAsymIdList
[
instanceId
].
push
(
mol
.
asym_id
.
value
(
j
))
}
}
// console.log(instancesAsymIdList)
const
entityLink
=
mmcif
.
pdbx_reference_entity_link
for
(
const
instanceId
of
Object
.
keys
(
instancesAsymIdList
))
{
const
asymIdList
=
instancesAsymIdList
[
instanceId
as
any
]
for
(
let
j
=
0
,
m
=
entityLink
.
_rowCount
;
j
<
m
;
++
j
)
{
if
(
entityLink
.
prd_id
.
value
(
j
)
===
molFeat
.
prd_id
.
value
(
i
))
{
// console.log(Table.getRow(entityLink, j))
const
link
:
PartialStructConnRow
=
{
ptnr1_label_asym_id
:
asymIdList
[
entityLink
.
component_1
.
value
(
j
)
-
1
],
ptnr1_label_atom_id
:
entityLink
.
atom_id_1
.
value
(
j
),
ptnr1_label_comp_id
:
entityLink
.
comp_id_1
.
value
(
j
),
ptnr1_label_seq_id
:
entityLink
.
entity_seq_num_1
.
value
(
j
),
ptnr2_label_asym_id
:
asymIdList
[
entityLink
.
component_2
.
value
(
j
)
-
1
],
ptnr2_label_atom_id
:
entityLink
.
atom_id_2
.
value
(
j
),
ptnr2_label_comp_id
:
entityLink
.
comp_id_2
.
value
(
j
),
ptnr2_label_seq_id
:
entityLink
.
entity_seq_num_2
.
value
(
j
),
pdbx_value_order
:
getStructConnValueOrder
(
entityLink
.
value_order
.
value
(
j
)),
}
// console.log(link)
bonds
.
push
(
link
)
}
}
}
}
return
bonds
}
export
function
getCcdBonds
(
mmcif
:
mmCIF_Database
)
{
const
bonds
:
PartialStructConnRow
[]
=
[]
}
async
function
run
(
pdb
:
string
,
out
?:
string
)
{
async
function
run
(
pdb
:
string
,
out
?:
string
)
{
const
ccd
=
await
getCCD
()
const
ccd
=
await
getCCD
()
//
cons
ole.log(Object.keys(ccd).length
)
cons
t
bird
=
await
getBIRD
(
)
const
mmcif
=
await
getPdb
(
pdb
)
const
mmcif
=
await
getPdb
(
pdb
)
// console.log(mmcif.chem_comp.id.toArray())
// console.log(mmcif.chem_comp.id.toArray())
// const chemCompBond_Schema = { chem_comp_bond: mmCIF_Schema.chem_comp_bond }
for
(
const
k
of
Object
.
keys
(
bird
))
{
// type chemCompBond_Schema = typeof chemCompBond_Schema;
const
entity
=
bird
[
k
].
pdbx_reference_entity_list
// type chemCompBond_Database = Database<chemCompBond_Schema>;
for
(
let
i
=
0
,
n
=
entity
.
_rowCount
;
i
<
n
;
++
i
)
{
// interface chemCompBond_Database extends Database<chemCompBond_Schema> {}
if
(
entity
.
ref_entity_id
.
value
(
i
)
!==
entity
.
component_id
.
value
(
i
).
toString
())
{
console
.
log
(
Table
.
getRow
(
entity
,
i
))
}
}
const
chemCompBondTables
:
Table
<
mmCIF_Schema
[
'
chem_comp_bond
'
]
>
[]
=
[]
const
link
=
bird
[
k
].
pdbx_reference_entity_link
for
(
let
i
=
0
,
n
=
link
.
_rowCount
;
i
<
n
;
++
i
)
{
if
(
link
.
value_order
.
value
(
i
)
!==
'
sing
'
)
{
console
.
log
(
Table
.
getRow
(
link
,
i
))
}
}
for
(
let
i
=
0
,
n
=
mmcif
.
chem_comp
.
_rowCount
;
i
<
n
;
++
i
)
{
const
polyLink
=
bird
[
k
].
pdbx_reference_entity_poly_link
const
ccdId
=
mmcif
.
chem_comp
.
id
.
value
(
i
);
for
(
let
i
=
0
,
n
=
link
.
_rowCount
;
i
<
n
;
++
i
)
{
// console.log(ccdId)
if
(
polyLink
.
value_order
.
value
(
i
)
!==
'
sing
'
)
{
if
(
ccdId
in
ccd
)
{
console
.
log
(
Table
.
getRow
(
polyLink
,
i
))
// console.log(`ccdId ${ccdId} has ${ccd[ccdId].chem_comp_atom._rowCount} atoms`)
}
// console.log(ccd[ccdId].chem_comp_bond.atom_id_1.toArray())
chemCompBondTables
.
push
(
ccd
[
ccdId
].
chem_comp_bond
)
}
else
{
console
.
error
(
`ccdId
${
ccdId
}
not found`
)
}
}
}
}
// for (const k of Object.keys(ccd)) {
const
extraTables
=
getExtraTables
(
mmcif
,
ccd
,
bird
)
// console.log(k)
const
combinedMmcif
=
Database
.
ofTables
(
'
mmcif_combined
'
,
mmCIF_Schema
,
Object
.
assign
({},
mmcif
,
extraTables
))
// }
// console.log(getEncodedCif(pdb, combinedMmcif))
// console.log(Database.getTablesAsRows(combinedMmcif))
// const combinedChemCompBonds = Database.ofTables('chemCompBond', chemCompBond_Schema, {
// console.log(getBirdBonds(combinedMmcif))
// chem_comp_bond: Table.concat(chemCompBondTables, mmCIF_Schema.chem_comp_bond)
console
.
log
(
getCcdBonds
(
combinedMmcif
))
// })
// console.log('concat done')
// console.log(getEncodedCif('chemCompBond', combinedChemCompBonds))
const
combinedMmcif
=
Database
.
ofTables
(
'
chemCompBond
'
,
mmCIF_Schema
,
Object
.
assign
({},
mmcif
,
{
chem_comp_bond
:
Table
.
concat
(
chemCompBondTables
,
mmCIF_Schema
.
chem_comp_bond
)
}
))
console
.
log
(
getEncodedCif
(
pdb
,
combinedMmcif
))
}
}
const
DATA_DIR
=
'
./build/data
'
const
DATA_DIR
=
'
./build/data
'
const
CCD_PATH
=
`
${
DATA_DIR
}
/components.cif`
const
CCD_PATH
=
`
${
DATA_DIR
}
/components.cif`
const
PVCD_PATH
=
`
${
DATA_DIR
}
/aa-variants-v1.cif`
const
PVCD_PATH
=
`
${
DATA_DIR
}
/aa-variants-v1.cif`
const
BIRD_PATH
=
`
${
DATA_DIR
}
/prd-all.cif`
const
CCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
'
const
CCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
'
const
PVCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/aa-variants-v1.cif
'
const
PVCD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/monomers/aa-variants-v1.cif
'
const
BIRD_URL
=
'
http://ftp.wwpdb.org/pub/pdb/data/bird/prd/prd-all.cif.gz
'
const
parser
=
new
argparse
.
ArgumentParser
({
const
parser
=
new
argparse
.
ArgumentParser
({
addHelp
:
true
,
addHelp
:
true
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment