'Import OWL data from URL', 'page callback' => 'drupal_get_form', 'page arguments' => array('owl_import_form'), 'access callback' => 'access_function',); $items['importgene'] = array( 'title' => 'Import gene data from TSV files', 'page callback' => 'drupal_get_form', 'page arguments' => array('gene_import_form'), 'access callback' => 'access_function',); $items['autocomplete/species'] = array( 'page callback' => 'autocomplete_species', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/sources'] = array( 'page callback' => 'autocomplete_sources', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/genes'] = array( 'page callback' => 'autocomplete_genes', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/aspect'] = array( 'page callback' => 'autocomplete_aspect', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/ontology_term'] = array( 'page callback' => 'autocomplete_ontology_term', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/ontology_term_extended'] = array( 'page callback' => 'autocomplete_ontology_term_extended', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['autocomplete/evidence_code'] = array( 'page callback' => 'autocomplete_evidence_code', 'type' => MENU_CALLBACK, 'access callback' => 'access_function',); $items['importgaf'] = array( 'title' => 'Import GAF data', 'page callback' => 'drupal_get_form', 'page arguments' => array('gaf_import_form'), 'access callback' => 'access_function',); $items['updatesources'] = array( 'title' => 'Update source data', 'page callback' => 'drupal_get_form', 'page arguments' => array('update_sources_form'), 'access callback' => 'access_function',); $items['exportgaf'] = array( 'title' => 'Export selected annotations to a gaf file', 'page callback' => 'drupal_get_form', 'page arguments' => array('gaf_export_form'), 'access callback' => 'access_function',); $items['browse_annotations'] = array( 'title' => 'Annotations', 'page callback' => 'browse_annotation_page', 'type' => MENU_NORMAL_ITEM, 'access callback' => 'access_function',); $items['clear_malformed_genes'] = array( 'title' => 'Clear Malformed Genes', 'page callback' => 'drupal_get_form', 'page arguments' => array('clear_malformed_genes_form'), 'access callback' => 'access_function',); $items['translate_gaf_id'] = array( 'title' => 'Translate GAF file ID', 'page callback' => 'drupal_get_form', 'page arguments' => array('translate_gaf_id_form'), 'access callback' => 'access_function',); $items['add_gene_synonyms'] = array( 'title' => 'Add Gene Synonyms', 'page callback' => 'drupal_get_form', 'page arguments' => array('add_gene_synonyms_form'), 'access callback' => 'access_function',); $items['update_gene_counts'] = [ 'title' => 'Update Taxon Gene Counts', 'page callback' => 'drupal_get_form', 'page arguments' => ['update_gene_counts_form'], 'access callback' => 'access_function',]; $items['update_gene_ontology_terms'] = [ 'title' => 'Update Gene Ontology term lists', 'page callback' => 'drupal_get_form', 'page arguments' => ['update_gene_ontology_terms_form'], 'access callback' => 'access_function',]; return $items; } /*************ANNOTATION VIEWER*************/ function browse_annotation_page(){ $filter_form = drupal_get_form('filter_annotation_form'); $browse_form = drupal_get_form('browse_annotation_form'); // $ecodes = taxonomy_vocabulary_machine_name_load('evidence_code'); // $ecodes_array = taxonomy_get_tree($ecodes->vid); // $ecodes_legend = '
';var_dump($num_items);exit;
}
function browse_annotation_form_submit_selected($form, &$form_state){
$results = array_filter($form_state['values']['tableselect']);
// echo(count($results));exit;
$gaf_data = "!gaf-version: 2.0\n!date: ".date("Y/m/d")."\n";
// $gaf_data.= variable_get('export_gaf_header_value')
$gaf_data.= format_gaf_text(variable_get('export_gaf_header_value'));
$gaf_data.= format_gaf_text($form_state['values']['gaf_notes']);
$gaf_data.= "!DB\tDB Object ID\tDB Object Symbol\tQualifier\tGO ID\tDB:Reference\tEvidence Code\tWith (or) From\tAspect\tDB Object Name\tDB Object Synonym\tDB Object Type\tTaxon\tDate\tAssigned By\tAnnotation Extension\tGene Product Form ID\n";
// echo ''.$gaf_data;exit;
// echo'';var_dump('ye');exit;
foreach($results as $ann_acc){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle', 'annotation')
->propertyCondition('title',$ann_acc);
$results = $query->execute();
if(count($results['node'])){
$annotation = array_pop(array_reverse($results['node']));
$gaf_data .= annotation_to_gaf_line($annotation->nid);
}
}
$filename = 'planteome_gaf_'.format_date(time(), 'custom', 'YMdHi').'.assoc';
$file = file_save_data($gaf_data, 'public://'.$filename);
// echo'';
// echo(file_get_contents($file->uri));
// echo'';var_dump($file);exit;
header('Content-Description: File Transfer');
header('Content-Type: '.$file->filemime);
header('Content-Disposition: attachment; filename="'.$file->filename.'"');
header('Expires: 0');
header('Cache-Control: must-revalidate');
header('Pragma: public');
header('Content-Length: '.$file->filesize);
readfile($file->uri);
// exit;
file_delete($file);
exit;
// $file = file_save_data($gaf_data, 'public://'.$filename);
// echo'';
// echo(file_get_contents($file->uri));
// file_delete($file);
// exit;
// echo'';var_dump($results);exit;
}
function format_gaf_text($text){
$lines_array = explode("\n", $text);
$out = '';
// echo'';var_dump($lines_array);exit;
foreach($lines_array as $line){
$out.= "!".$line."\n";
}
return $out;
}
//TODO 2 - Lakshit
//will need a batch process
function browse_annotation_form_submit_all($form, &$form_state){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle', 'annotation');
if(isset($_GET['gene']) && !empty($_GET['gene'])){
$q = new EntityFieldQuery();
$q->entityCondition('entity_type','node')
->entityCondition('bundle', 'gene')
->propertyCondition('title',$_GET['gene']);
$r = $q->execute();
if (!empty($r['node'])) {
$arrkeys = array_keys($r['node']);
$key = array_shift($arrkeys);
$gene = node_load($key);
$query->fieldCondition('field_gene','target_id',$gene->nid,'=');
}
}
if(isset($_GET['species']) && !empty($_GET['species'])){
$q = new EntityFieldQuery();
$q->entityCondition('entity_type','node')
->entityCondition('bundle', 'taxon')
->propertyCondition('title',$_GET['species']);
$r = $q->execute();
if (!empty($r['node'])) {
$arrkeys = array_keys($r['node']);
$key = array_shift($arrkeys);
$species = node_load($key);
$query->fieldCondition('field_ref_species','target_id',$species->nid,'=');
}
}
if(isset($_GET['aspect']) && !empty($_GET['aspect'])){
$term = taxonomy_term_load($_GET['aspect']);
$query->fieldCondition('field_aspect','tid',$term->tid,'=');
}
if(isset($_GET['evidence_code']) && !empty($_GET['evidence_code'])){
$term = taxonomy_term_load($_GET['evidence_code']);
$query->fieldCondition('field_evidence_code','tid',$term->tid,'=');
}
$results = $query->execute();
$gaf_data = "!gaf-version: 2.0\n!date: ".date("Y/m/d")."\n";
// $gaf_data.= variable_get('export_gaf_header_value')
$gaf_data.= format_gaf_text(variable_get('export_gaf_header_value'));
$gaf_data.= format_gaf_text($form_state['values']['gaf_notes']);
$gaf_data.= "!DB\tDB Object ID\tDB Object Symbol\tQualifier\tGO ID\tDB:Reference\tEvidence Code\tWith (or) From\tAspect\tDB Object Name\tDB Object Synonym\tDB Object Type\tTaxon\tDate\tAssigned By\tAnnotation Extension\tGene Product Form ID\n";
if(array_key_exists('node',$results)){
foreach($results['node'] as $annotation){
$gaf_data .= annotation_to_gaf_line($annotation->nid);
}
$filename = 'planteome_gaf_'.format_date(time(), 'custom', 'YMdHi').'.assoc';
$file = file_save_data($gaf_data, 'public://'.$filename);
// echo'';
// echo(file_get_contents($file->uri));
// echo'';var_dump($file);exit;
header('Content-Description: File Transfer');
header('Content-Type: '.$file->filemime);
header('Content-Disposition: attachment; filename="'.$file->filename.'"');
header('Expires: 0');
header('Cache-Control: must-revalidate');
header('Pragma: public');
header('Content-Length: '.$file->filesize);
readfile($file->uri);
// exit;
file_delete($file);
exit;
}
// echo'';var_dump($results);exit;
}
/*************OWL FORM*************/
/**
* Implements hook_form().
*
* Creates a form .
*
* @param form
* The form we're working with
* @param &form_state
* Reference to the state of the form
*/
function owl_import_form($form, &$form_state){
$form['input'] = array(
'#type' => 'textfield',
'#title' => 'Enter OWL url',
'#required' => TRUE,
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Submit'),
);
return $form;
}
function owl_import_form_validate($form, &$form_state){}
function owl_import_form_submit($form, &$form_state){
$url = $form_state['values']['input'];
$bio_tree = get_children(1,(parse_owl($url)));
iterate($bio_tree);
}
/*************GENE IMPORT FORM*************/
/**
* Implements hook_form().
*
* Creates a form .
*
* @param form
* The form we're working with
* @param &form_state
* Reference to the state of the form
*/
function gene_import_form($form, &$form_state){
//phpinfo();
$form['gene_file'] = array(
'#type' => 'file',
'#title' => t('Upload gene file'),
);
$form['gene_file_alt'] = array(
'#type' => 'checkbox',
'#title' => t('Use alternative file parser? (for files from UNIX/Mac OS X+)'),
);
$form['alias_file'] = array(
'#type' => 'file',
'#title' => t('Upload alias file'),
);
$form['alias_file_alt'] = array(
'#type' => 'checkbox',
'#title' => t('Use alternative file parser? (for files from UNIX/Mac OS X+)'),
);
$form['data'] = array(
'#type' => 'textfield',
'#title' => 'Enter species.',
'#required' => TRUE,
'#autocomplete_path'=> 'autocomplete/species',
);
$form['source'] = array(
'#type' => 'textfield',
'#title' => 'Enter source.',
'#required' => TRUE,
'#autocomplete_path'=> 'autocomplete/sources',
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Submit'),
);
return $form;
}
function gene_import_form_validate($form, &$form_state){}
function gene_import_form_submit($form, &$form_state){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->propertyCondition('title', $form_state['values']['data']);
//this will be updated when we change the context, and can likely be moved outside the batch process altogether
$results = $query->execute();
$species_nid = array_keys($results['node'])[0];
$source_name = $form_state['values']['source'];
if(db_table_exists('tmptable')){
db_drop_table('tmptable');
}
//TODO 3- Lakshit
// - add a new form to reset the accession table
// - this should require a password
/*if(db_table_exists('accession')){
db_drop_table('accession');
exit('accession dropped');
}*/
if(db_table_exists('accession') == FALSE){
$schema = array(
'description' => 'Table to track accession incrementation',
'fields' => array(
'accession' => array(
'type' => 'int',
'not null' => TRUE,
'description' => 'Primary Key: Unique aggregator static media asset ID.'),),
'primary key' => array('accession'),
);
db_create_table('accession',$schema);
db_insert('accession')
->fields(array(
'accession' => 1,
))
->execute();
}
$file1 = file_save_upload('gene_file', array(
'file_validate_extensions' => array('txt'),
));
$file2 = file_save_upload('alias_file', array(
'file_validate_extensions' => array('txt'),
));
//get the content in the file
$content1 = file_get_contents($file1->uri);
$content2 = file_get_contents($file2->uri);
//remove the file in temp and the pointer in the database
file_delete($file1);
file_delete($file2);
if($form_state['values']['gene_file_alt']){
//use UNIX/MACOSX \n
$arr1 = tabbed_file_to_array_alt($content1);
} else {
//use windows \r\n
$arr1 = tabbed_file_to_array($content1);
}
if($form_state['values']['alias_file_alt']){
//use UNIX/MACOSX \n
$arr2 = tabbed_file_to_array_alt($content2);
} else {
//use windows \r\n
$arr2 = tabbed_file_to_array($content2);
}
$chunks1 = array_chunk($arr1, 100);
$chunks2 = array_chunk($arr2, 100);
if(db_table_exists('tmptable') == FALSE){
$schema = array(
'description' => 'tmp table',
'fields' => array(
'pid' => array(
'type' => 'serial',
'not null' => TRUE,
'description' => 'Primary Key: Unique aggregator static media asset ID.'),
'gene_stable_id' => array(
'type' => 'varchar',
'length' => 255,
'not null' => TRUE,
'default' => '',),
'gene_symbol' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,
'default' => '',),
'gene_name' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,),),
'primary key' => array('pid'),
);
db_create_table('tmptable',$schema);
}
$operations = array();
foreach($chunks2 as $chunk2){
$operations[] = array('insert_gene_aliases', array($chunk2));
}
foreach($chunks1 as $chunk1){
$operations[] = array('import_gene_data_to_node', array($chunk1, $species_nid, $source_name));
}
$batch = array(
'operations' => $operations,
'finished' => 'biomart_process_finished',
'title' => t('processing biomart gene batch'),
'init_message' => t('starting biomart batch process'),
'progress_message' => t('processed @current out of @total')
);
batch_set($batch);
}
/*************GAF FORM*************/
/**
* Implements hook_form().
*
* Creates a form .
*
* @param form
* The form we're working with
* @param &form_state
* Reference to the state of the form
*/
function gaf_import_form($form, &$form_state){
$form['gaf_file'] = array(
'#type' => 'file',
'#title' => t('Upload gaf file'),
);
$form['id_syntax'] = array(
'#type' => 'textfield',
'#title' => 'Enter the syntax for gene ID present in the uploaded annotation file, using # to represent a number. Example: AT#G#####',
'#required' => TRUE,
);
$form['gaf_url'] = array(
'#type' => 'textfield',
'#title' => 'Enter URL.',
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Submit'),
);
return $form;
}
function gaf_import_form_validate($form, &$form_state){}
function gaf_import_form_submit($form, &$form_state){
//TODO 4 - Lakshit
// - see TODO 3
// if(db_table_exists('annotation_accession')){
// db_drop_table('annotation_accession');
// exit('annotation_accession dropped');
// }
if(db_table_exists('annotation_accession') == FALSE){
$schema = array(
'description' => 'Table to track annotation accession incrementation',
'fields' => array(
'accession' => array(
'type' => 'int',
'not null' => TRUE,
'description' => 'Primary Key'),),
'primary key' => array('accession'),
);
db_create_table('annotation_accession',$schema);
db_insert('annotation_accession')
->fields(array(
'accession' => 1,
))
->execute();
}
$file = file_save_upload('gaf_file', array(
'file_validate_extensions' => array('assoc'),
));
$content = file_get_contents($file->uri);
file_delete($file);
$chunk_amount = 50;
$gaf_rows = explode("\n", $content);
$chunks = array_chunk($gaf_rows, $chunk_amount);
$id_syntax = $form_state['values']['id_syntax'];
$operations = array();
foreach($chunks as $chunk){
$operations[] = array('import_annotation_to_node', array($chunk,$id_syntax));
}
$batch = array(
'operations' => $operations,
'finished' => 'gaf_process_finished',
'title' => t('processing gene annotation batch'),
'init_message' => t('starting gaf batch process'),
'progress_message' => t('processed @current out of @total')
);
batch_set($batch);
}
/*************GAF EXPORT FORM*************/
/**
* Implements hook_form().
*
* Creates a form .
*
* @param form
* The form we're working with
* @param &form_state
* Reference to the state of the form
*/
function gaf_export_form($form, &$form_state){
$form['text'] = array(
'#type' => 'textfield',
'#title'=> t('title'),
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('submit'),
);
return $form;
}
function gaf_export_form_validate($form, &$form_state){
}
//TODO 5 - Lakshit
// - Please verify that this is working correctly with a batch process
function gaf_export_form_submit($form, &$form_state){
$annotation_ids = explode("|",$form_state['values']['text']);
$gaf_data = "!gaf-version: 2.0\n!This data in GAF 2.0 format is provided by the NSF-funded Planteome project (www.planteome.org).\n!Date: ".date("Y/m/d");
//foreach version
foreach($annotation_ids as $annotation_id){
$gaf_data .= annotation_to_gaf_line($annotation_id);
}
$filename = 'planteome_gaf_'.format_date(time(), 'custom', 'YMdHi').'.assoc';
//exit($filename);
//file_put_contents($filename, $gaf_data);exit;
$file = file_save_data($gaf_data, 'public://'.$filename);
echo'';/*var_dump($file);echo'
';*/echo(file_get_contents($file->uri));file_delete($file);exit;
//might need to uncomment the pre to print nicely
// echo'';echo $gaf_data;var_dump($gaf_data);exit;
//batch version
// $chunks = array_chunk($annotation_ids, 40);
// //echo '';var_dump($chunks);exit;
// //$id_syntax = $form_state['values']['id_syntax'];
// $operations = array();
// foreach($chunks as $chunk){
// $operations[] = array('export', array($chunk));
// }
// $batch = array(
// 'operations' => $operations,
// 'finished' => 'gaf_process_finished',
// 'title' => t('processing gene annotation batch'),
// 'init_message' => t('starting gaf batch process'),
// 'progress_message' => t('processed @current out of @total')
// );
// //echo '';var_dump($batch);exit;
// batch_set($batch);
}
/*************SOURCES FORM*************/
/**
* Implements hook_form().
*
* Creates a form .
*
* @param form
* The form we're working with
* @param &form_state
* Reference to the state of the form
*/
function update_sources_form($form, &$form_state){
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Click to run the source update script.'),
);
return $form;
}
function update_sources_form_validate($form, &$form_state){
}
function update_sources_form_submit($form, &$form_state){
update_sources();
drupal_set_message(t('Sources have been updated.'));
}
/*************GENE COUNT FORM*************/
function update_gene_counts_form($form, &$form_state){
$form['submit'] = [
'#type' => 'submit',
'#value' => t('Click to run the gene count update script'),
];
return $form;
}
function update_gene_counts_form_validate($form, &$form_state){
}
function update_gene_counts_form_submit($form, &$form_state){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'taxon')
->propertyCondition('type', ['taxon']);
$results = $query->execute();
// echo'';var_dump($results);exit;
$chunks = array_chunk($results['node'], 50);
$operations = [];
foreach($chunks as $chunk){
$operations[] = ['update_taxon_gene_count', [$chunk]];
}
$batch = [
'operations' => $operations,
'finished' => 'batch_update_finished',
'title' => t('processing gene count update'),
'title' => t('starting taxon gene count update process'),
'title' => t('processed @current out of @total')
];
batch_set($batch);
// echo'';var_dump($results);exit;
}
function update_taxon_gene_count($chunk, &$context){
foreach($chunk as $taxon){
$taxon = node_load($taxon->nid);
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'gene')
->fieldCondition('field_ref_species', 'target_id', $taxon->nid, '=');
// ->propertyCondition('type', 'gene');
$results = $query->execute();
$taxon->field_gene_count[$taxon->language][0]['value'] = count($results['node']);
node_save($taxon);
// echo'';var_dump($taxon);exit;
/*if(isset($results['node']) && count($results['node']) > 3){
echo'';var_dump($taxon->title);var_dump($results['node']);exit;
}*/
}
}
function batch_update_finished($success, $results, $operations) {
//Let the user know we have finished!
print t('Finished updating!');
}
/*************GENE COUNT FORM*************/
function update_gene_ontology_terms_form($form, &$form_state){
$form['submit'] = [
'#type' => 'submit',
'#value' => t('Click to run the gene ontology term update script'),
];
return $form;
}
function update_gene_ontology_terms_form_validate($form, &$form_state){
}
function update_gene_ontology_terms_form_submit($form, &$form_state){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'gene')
->propertyCondition('type', ['gene']);
$results = $query->execute();
// echo'';var_dump($results);exit;
$chunks = array_chunk($results['node'], 50);
$operations = [];
foreach($chunks as $chunk){
$operations[] = ['update_gene_ontology_terms', [$chunk]];
}
$batch = [
'operations' => $operations,
'finished' => 'batch_update_finished',
'title' => t('processing gene ontology terms update'),
'title' => t('starting gene ontology terms update process'),
'title' => t('processed @current out of @total')
];
batch_set($batch);
// echo'';var_dump($results);exit;
}
function update_gene_ontology_terms($chunk, &$context){
foreach($chunk as $gene){
$gene = node_load($gene->nid);
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'annotation')
->fieldCondition('field_gene', 'target_id', $gene->nid, '=');
$results = $query->execute();
if(isset($results['node'])){
// echo'';var_dump($results['node']);exit;
$ontology_terms = '';
foreach($results['node'] as $result){
$annotation = node_load($result->nid);
$term = $annotation->field_ontology_id['und'][0]['value'];
if($ontology_terms == ''){
$ontology_terms.= $term;
} else {
$pos = strpos($ontology_terms, $term);
if($pos === false){
//not found
$ontology_terms.= ','.$term;
}
}
// echo'';var_dump($term);exit;
}
if($ontology_terms != ''){
$gene->field_gene_ontology_terms[$gene->language][0]['value'] = $ontology_terms;
// echo'';var_dump($gene);exit;
node_save($gene);
}
// echo'';var_dump($ontology_terms);exit;
}
}
/*foreach($chunk as $taxon){
$taxon = node_load($taxon->nid);
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle', 'gene')
->fieldCondition('field_ref_species', 'target_id', $taxon->nid, '=');
// ->propertyCondition('type', 'gene');
$results = $query->execute();
$taxon->field_gene_count[$taxon->language][0]['value'] = count($results['node']);
node_save($taxon);
}*/
}
/***************PARSERS******************/
function parse_gaf_url($gaf_url) {
//parsed_array entities follow schema from shared txt file
$html_str = htmlspecialchars(curl_get_contents($gaf_url));
$final_array = array();
$exploded_array = explode("\n",$html_str);
foreach($exploded_array as $annotation_string){
$annotation = explode("\t",$annotation_string);
if((count($annotation) == 17) && ($annotation[0][0] != '!')){
$parsed_array = array();
$parsed_array['source'] = $annotation[0];
$parsed_array['object'] = $annotation[1];
$parsed_array['object_symbol'] = $annotation[2];
$parsed_array['annotation_qualifier'] = $annotation[3];
$parsed_array['GO_ID'] = $annotation[4];
$parsed_array['reference'] = $annotation[5];
$parsed_array['evidence'] = $annotation[6];
$parsed_array['withfrom'] = $annotation[7];
$parsed_array['aspect'] = $annotation[8];
$parsed_array['synonyms1'] = $annotation[9];
$parsed_array['synonyms2'] = $annotation[10];
$parsed_array['object_type'] = $annotation[11];
$parsed_array['taxon_id'] = $annotation[12];
$parsed_array['date'] = $annotation[13];
$parsed_array['assigned_by'] = $annotation[14];
$parsed_array['annotation_extension'] = $annotation[15];
$parsed_array['gene_product_form_id'] = $annotation[16];
$final_array[] = $parsed_array;
}
}
return $final_array;
}
function parse_gaf_lines($lines){
//$lines = explode("\n", $gaf_file_content);
$final_array = array();
foreach($lines as $line){
//skip comments
$first_char = substr($line, 0, 1);
$annotation = explode("\t",$line);
if(( $first_char != '!')&&(validate_gaf_line_array($annotation))) {
$parsed_array = array();
$parsed_array['source'] = $annotation[0];
$parsed_array['object'] = $annotation[1];
$parsed_array['object_symbol'] = $annotation[2];
$parsed_array['annotation_qualifier'] = $annotation[3];
$parsed_array['GO_ID'] = $annotation[4];
$parsed_array['reference'] = $annotation[5];
$parsed_array['evidence'] = $annotation[6];
$parsed_array['withfrom'] = $annotation[7];
$parsed_array['aspect'] = $annotation[8];
$parsed_array['synonyms1'] = $annotation[9];
$parsed_array['synonyms2'] = $annotation[10];
$parsed_array['object_type'] = $annotation[11];
$parsed_array['taxon_id'] = $annotation[12];
$parsed_array['date'] = $annotation[13];
$parsed_array['assigned_by'] = $annotation[14];
$parsed_array['annotation_extension'] = $annotation[15];
$parsed_array['gene_product_form_id'] = $annotation[16];
$final_array[] = $parsed_array;
}
}
return $final_array;
}
function parse_gaf_file($gaf_file_content){
$lines = explode("\n", $gaf_file_content);
$final_array = array();
foreach($lines as $line){
//skip comments
$first_char = substr($line, 0, 1);
$annotation = explode("\t",$line);
if(( $first_char != '!')&&(validate_gaf_line_array($annotation))) {
echo 'count: '.count($final_array).'
';
echo 'mem usage: '.memory_get_usage().'
';
$parsed_array = array();
$parsed_array['source'] = $annotation[0];
$parsed_array['object'] = $annotation[1];
$parsed_array['object_symbol'] = $annotation[2];
$parsed_array['annotation_qualifier'] = $annotation[3];
$parsed_array['GO_ID'] = $annotation[4];
$parsed_array['reference'] = $annotation[5];
$parsed_array['evidence'] = $annotation[6];
$parsed_array['withfrom'] = $annotation[7];
$parsed_array['aspect'] = $annotation[8];
$parsed_array['synonyms1'] = $annotation[9];
$parsed_array['synonyms2'] = $annotation[10];
$parsed_array['object_type'] = $annotation[11];
$parsed_array['taxon_id'] = $annotation[12];
$parsed_array['date'] = $annotation[13];
$parsed_array['assigned_by'] = $annotation[14];
$parsed_array['annotation_extension'] = $annotation[15];
$parsed_array['gene_product_form_id'] = $annotation[16];
$final_array[] = $parsed_array;
}
}
return $final_array;
}
/**
* parse_owl($owl_url, $version).
*
* Parses a given OWL file, returning an array of arrays which represent different entities.
*
* @param owl_url
* The URL with the OWL data to be parsed
*/
function parse_owl($owl_url){
$html_str = htmlspecialchars(curl_get_contents($owl_url));
//match on all class entities
$re = htmlspecialchars('//s');
preg_match_all($re, $html_str, $matches, PREG_SET_ORDER, 0);
$final_array = array();
foreach($matches as $index => $match){
if(preg_match('/NCBITaxon_\d+/',$match[0])) {
//create entity array
$parsed_array = array();
//Taxon ID
if(preg_match(htmlspecialchars('//s'), $match[0], $output)){
$parsed_array['id'] = $output[1]; //gets the capture group
} else {
$parsed_array['id'] = 'BAD NCBITAXONID';
}
//Rank
if(preg_match(htmlspecialchars('/ /s'), $match[0], $output)){
$parsed_array['rank'] = $output[1]; //gets the capture group
} else {
$parsed_array['rank'] = 'no rank';
if($parsed_array['id']==1||$parsed_array['id']==131567){
$parsed_array['rank'] = 'root';
}
}
//Label - will need to parse for genus/species when importing
if(preg_match(htmlspecialchars('/(.*?)<\/rdfs:label>/s'), $match[0], $output)){
$parsed_array['label'] = $output[1]; //gets the capture group
} else {
$parsed_array['label'] = 'BAD LABEL';
}
//Parent
if(preg_match(htmlspecialchars('/ /s'), $match[0], $output)){
$parsed_array['parent'] = $output[1]; //gets the capture group
} else {
$parsed_array['parent'] = 'BAD PARENT ID';
}
//Synonyms
$parsed_array['synonyms']['related'][] = 'NO RELATED';
$parsed_array['synonyms']['exact'][] = 'NO EXACT';
//get related syns
if(preg_match_all(htmlspecialchars('/(.*?)<\/oboInOwl:hasRelatedSynonym>/s'), $match[0], $related) !== 0){
$parsed_array['synonyms']['related'] = array();
foreach($related[1] as $synonym){
$parsed_array['synonyms']['related'][] = $synonym;
}
}
//get exact syns
if(preg_match_all(htmlspecialchars('/(.*?)<\/oboInOwl:hasExactSynonym>/s'), $match[0], $exact) !== 0){
$parsed_array['synonyms']['exact'] = array();
foreach($exact[1] as $synonym){
$parsed_array['synonyms']['exact'][] = $synonym;
}
}
if($parsed_array['rank'] == 'species'){
array_unshift($final_array, $parsed_array);
} else {
array_push($final_array, $parsed_array);
}
}
}
return $final_array;
}
function parse_sources_yaml(){
//add dynamic url functionality if needed
$yaml_str = htmlspecialchars(curl_get_contents('https://raw.githubusercontent.com/Planteome/go-site-xrefs-fork/master/metadata/db-xrefs.yaml'));
preg_match_all('/base: .*?(?:- data|$)/s', $yaml_str, $db_matches);
$ret_arr = array();
foreach($db_matches[0] as $db_match){
preg_match('/base: (.*?) name: (.*?)(?:description|generic_url|synonyms).*?entity_types:(.*?)- data/s', $db_match, $name_and_ent_types_match);
//DB NAME
if( count($name_and_ent_types_match) != 0){
$source_data_array = array();
$source_machine_name = $name_and_ent_types_match[1];
$source_human_name = $name_and_ent_types_match[2];
$source_data_array['human_name'] = trim($source_human_name);
$source_data_array['machine_name'] = trim($source_machine_name);
$source_data_array['no_yaml_entities'] = 0;
$source_data_array['no_entity_type_name'] = 0;
$source_data_array['no_entity_id_syntax'] = 0;
$source_data_array['no_url_syntax'] = 0;
preg_match_all('/e_name: .*?(?:- typ|$)/s',$name_and_ent_types_match[3],$db_entity_types);
if( count($db_entity_types) > 0){
//go through each entity type to pull out data
for($x=0;$x 'textfield',
'#title' => 'Enter species.',
'#required' => TRUE,
'#autocomplete_path'=> 'autocomplete/species',
);
$form['password'] = array(
'#type' => 'password',
'#title' => 'Password'
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Click to remove ALL genes associated with the selected species.'),
);
return $form;
}
function clear_malformed_genes_form_validate($form, &$form_state){
if($form_state['values']['password'] != 'password'){
form_set_error('clear_malformed_genes_form', t('Cmon now.'));
}
}
function clear_malformed_genes_form_submit($form, &$form_state){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->propertyCondition('title', $form_state['values']['data']);
$results = $query->execute();
$species_nid = array_keys($results['node'])[0];
$q = new EntityFieldQuery();
$q->entityCondition('entity_type','node')
->entityCondition('bundle','gene')
->propertyCondition('type',array('gene'))
->fieldCondition('field_ref_species', 'target_id', $species_nid, '=');
$r = $q->execute();
if(count($r['node'])){
$chunks = array_chunk($r['node'], 40);
$operations = array();
foreach($chunks as $chunk){
$operations[] = array('delete_malformed_gene_batch', array($chunk));
}
$batch = array(
'operations' => $operations,
'finished' => 'clear_malformed_genes_process_finished',
'title' => t('processing gene batch'),
'init_message' => t('starting clear genes batch process'),
'progress_message' => t('processed @current out of @total')
);
batch_set($batch);
}
drupal_set_message(t('no genes to remove'));
}
function delete_malformed_gene_batch($chunk, &$context){
foreach($chunk as $gene_node){
node_delete($gene_node->nid);
}
}
function clear_malformed_genes_process_finished($success, $results, $operations) {
//Let the user know we have finished!
print t('Finished clearing genes');
}
function add_gene_synonyms_form($form, &$form_state){
$form['species'] = array(
'#type' => 'textfield',
'#title' => 'Enter species.',
'#autocomplete_path'=> 'autocomplete/species',
);
$form['synonym_file'] = array(
'#type' => 'file',
'#title' => 'Upload synonym file',
);
$form['synonym_file_alt'] = array(
'#type' => 'checkbox',
'#title' => t('Use alternative file parser? (for files from UNIX/Mac OS X+)'),
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => 'Submit',
);
return $form;
}
function add_gene_synonyms_form_validate($form, &$form_state){
}
function add_gene_synonyms_form_submit($form, &$form_state){
$file = file_save_upload('synonym_file', array(
'file_validate_extensions' => array('txt'),
));
$content = file_get_contents($file->uri);
file_delete($file);
if($form_state['values']['synonym_file_alt']){
//use UNIX/MACOSX \n
$arr1 = tabbed_file_to_array_alt($content);
} else {
//use windows \r\n
$arr1 = tabbed_file_to_array($content);
}
$chunks = array_chunk($arr1, 40);
$operations = array();
foreach($chunks as $chunk){
$operations[] = array('add_gene_synonym', array($chunk));
}
$batch = array(
'operations' => $operations,
'finished' => 'add_gene_synonym_process_finished',
'title' => t('processing gene synonym batch'),
'init_message' => t('starting gene synonym batch process'),
'progress_message' => t('processed @current out of @total')
);
batch_set($batch);
}
function add_gene_synonym_process_finished($success, $results, $operations) {
//Let the user know we have finished!
print t('Finished adding!');
}
function add_gene_synonym($chunk){
foreach($chunk as $gene_syn){
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','gene')
->propertyCondition('type',array('gene'))
->fieldCondition('field_gene_id','value', $gene_syn['gene_stable_id']);
$results = $query->execute();
if(count($results)){
$gene_nid = array_keys($results['node'])[0];
$node = node_load($gene_nid);
$node->field_synonyms[$node->language][]['value'] = $gene_syn['synonym_to_add'];
$node = node_submit($node);
node_save($node);
}
}
}
//Translate GAF ID form
//NOTE
//currently half-developed, would need to add a batch to process target ids into tmptable, then another batch to build a file with translated target ids.
//Zea Mays genes were the ones that precipitated this, but they don't actually seem to overlap between the gene file and the translation file.
function translate_gaf_id_form($form, &$form_state){
$form['gaf_file'] = array(
'#type' => 'file',
'#title' => t('Upload gaf file'),
);
$form['translate_file'] = array(
'#type' => 'file',
'#title' => t('Upload translation file'),
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Submit'),
);
return $form;
}
function translate_gaf_id_form_validate($form, &$form_state){
}
function translate_gaf_id_form_submit($form, &$form_state){
// if(db_table_exists('annotation_accession')){
// db_drop_table('annotation_accession');
// exit('annotation_accession dropped');
// }
/*$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->propertyCondition('title', $form_state['values']['data']); //this will be updated when we change the context, and can likely be moved outside the batch process altogether
$results = $query->execute();
$species_nid = array_keys($results['node'])[0];
$source_name = $form_state['values']['source'];
if(db_table_exists('tmptable')){
db_drop_table('tmptable');
}
/*if(db_table_exists('accession')){
db_drop_table('accession');
exit('accession dropped');
}*/
/*if(db_table_exists('accession') == FALSE){
$schema = array(
'description' => 'Table to track accession incrementation',
'fields' => array(
'accession' => array(
'type' => 'int',
'not null' => TRUE,
'description' => 'Primary Key: Unique aggregator static media asset ID.'),),
'primary key' => array('accession'),
);
db_create_table('accession',$schema);
db_insert('accession')
->fields(array(
'accession' => 1,
))
->execute();
//exit('accession created, starting with 1');
}
//exit('accession exists');
//saved the file into a tmp folder (location to save not set)
//this will also add a pointer in the database
$file1 = file_save_upload('gene_file', array(
'file_validate_extensions' => array('txt'),
));
$file2 = file_save_upload('alias_file', array(
'file_validate_extensions' => array('txt'),
));
//get the content in the file
$content1 = file_get_contents($file1->uri);
$content2 = file_get_contents($file2->uri);
//remove the file in temp and the pointer in the database
file_delete($file1);
file_delete($file2);
if($form_state['values']['gene_file_alt']){
//use UNIX/MACOSX \n
$arr1 = tabbed_file_to_array_alt($content1);
} else {
//use windows \r\n
$arr1 = tabbed_file_to_array($content1);
}
if($form_state['values']['alias_file_alt']){
//use UNIX/MACOSX \n
$arr2 = tabbed_file_to_array_alt($content2);
} else {
//use windows \r\n
$arr2 = tabbed_file_to_array($content2);
}
// $arr1 = tabbed_file_to_array($content1);
// $arr2 = tabbed_file_to_array_alt($content2);
$chunks1 = array_chunk($arr1, 10);
$chunks2 = array_chunk($arr2, 40);
//echo '';print_r($chunks2);exit;
if(db_table_exists('tmptable') == FALSE){
$schema = array(
'description' => 'tmp table',
'fields' => array(
'pid' => array(
'type' => 'serial',
'not null' => TRUE,
'description' => 'Primary Key: Unique aggregator static media asset ID.'),
'gene_stable_id' => array(
'type' => 'varchar',
'length' => 255,
'not null' => TRUE,
'default' => '',),
'gene_symbol' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,
'default' => '',),
'gene_name' => array(
'type' => 'varchar',
'length' => 255,
'not null' => FALSE,),),
'primary key' => array('pid'),
);
db_create_table('tmptable',$schema);
}
//echo'';var_dump($chunks2);exit;
$operations = array();
foreach($chunks2 as $chunk2){
$operations[] = array('insert_gene_aliases', array($chunk2));
}
foreach($chunks1 as $chunk1){
$operations[] = array('import_gene_data_to_node', array($chunk1, $species_nid, $source_name));
}
$batch = array(
'operations' => $operations,
'finished' => 'biomart_process_finished',
'title' => t('processing biomart gene batch'),
'init_message' => t('starting biomart batch process'),
'progress_message' => t('processed @current out of @total')
);
if(db_table_exists('tmptable')){
db_drop_table('tmptable');
}
if(db_table_exists('annotation_accession') == FALSE){
$schema = array(
'description' => 'Table to track annotation accession incrementation',
'fields' => array(
'accession' => array(
'type' => 'int',
'not null' => TRUE,
'description' => 'Primary Key'),),
'primary key' => array('accession'),
);
db_create_table('annotation_accession',$schema);
db_insert('annotation_accession')
->fields(array(
'accession' => 1,
))
->execute();
//exit('accession created, starting with 1');
}
$file = file_save_upload('gaf_file', array(
'file_validate_extensions' => array('assoc'),
));
$content = file_get_contents($file->uri);
file_delete($file);
$annotation_array = parse_gaf_file($content);
$chunks = array_chunk($annotation_array, 40);
//echo '';var_dump($chunks);exit;
$id_syntax = $form_state['values']['id_syntax'];
$operations = array();
foreach($chunks as $chunk){
$operations[] = array('import_annotation_to_node', array($chunk,$id_syntax));
}*/
$batch = array(
'operations' => $operations,
'finished' => 'translate_gaf_id_process_finished',
'title' => t('translating annotation batch'),
'init_message' => t('starting annotation translate batch process'),
'progress_message' => t('processed @current out of @total')
);
echo '';var_dump($batch);exit;
batch_set($batch);
}
function translate_gaf_id_process_finished(){
}
function validate_gaf_line_array($array){
if((count($array) != 17)||
($array[0] == '')||
($array[1] == '')||
($array[2] == '')||
($array[4] == '')||
($array[5] == '')||
($array[6] == '')||
($array[8] == '')||
($array[11] == '')||
($array[12] == '')||
($array[13] == '')||
($array[14] == '')) {
return FALSE;
} else {
return TRUE;
}
}
function annotation_to_gaf_line($annotation_nid){
$annotation = node_load($annotation_nid);
// echo'';var_dump($annotation);exit;
if($annotation->status != 1){
return '';
}
//field collction id
// echo'';var_dump($annotation);exit;
if(empty($annotation->field_publication)){
return '';
} else {
$biblios = $annotation->field_publication[$annotation->language];
// echo'';
$tmp1 = '';
foreach ($biblios as $key => $value) {
$biblio = node_load($biblios[$key]['target_id']);
if($key != 0){
$tmp1.= '|';
}
if(isset($biblio->biblio_pubmed_id)){
$tmp1.= 'PMID:'.$biblio->biblio_pubmed_id;
} else {
$tmp1.= $biblio->biblio_other_number;
}
// var_dump($biblio);
// var_dump($biblio->biblio_pubmed_id);
}
// exit;
// echo'';var_dump($tmp1);exit;
if($tmp1 == ''){
return '';
}
}
/*//DEPRECATED
// now that we are using the Biblio module, this is not needed.
$data_source_fc_id = $annotation->field_data_source[$annotation->language][0]['value'];
$tmp1 = '';
if(count($annotation->field_db_reference) > 0){
for($x=0;$xfield_db_reference[$annotation->language]);$x++){
$db_reference_fc_id = $annotation->field_db_reference[$annotation->language][$x]['value'];
$db_reference = entity_load('field_collection_item', array($db_reference_fc_id));
$reference_term = taxonomy_term_load($db_reference[$db_reference_fc_id]->field_source_name[$annotation->language][0]['tid']);
if($reference_term->field_machine_name['und'][0]['value'] != ''){
if($x!=0){$tmp1.='|';}
$tmp1 .= $reference_term->field_machine_name['und'][0]['value'];
if($db_reference[$db_reference_fc_id]->field_object_id[$annotation->language][0]['value'] != ''){
$tmp1 .= ':';
$tmp1 .= $db_reference[$db_reference_fc_id]->field_object_id[$annotation->language][0]['value'];
}
}
}
}*/
// echo'';var_dump($tmp1);exit;
$tmp2 = '';
if(count($annotation->field_with_or_from) > 0){
for($x=0;$xfield_with_or_from[$annotation->language]);$x++){
$with_or_from_fc_id = $annotation->field_with_or_from[$annotation->language][$x]['value'];
$with_or_from = entity_load('field_collection_item', array($with_or_from_fc_id));
$reference_term = taxonomy_term_load($with_or_from[$with_or_from_fc_id]->field_source_name[$annotation->language][0]['tid']);
if($reference_term->field_machine_name['und'][0]['value'] != ''){
if($x!=0){$tmp2.='|';}
$tmp2 .= $reference_term->field_machine_name['und'][0]['value'];
if($with_or_from[$with_or_from_fc_id]->field_object_id[$annotation->language][0]['value'] != ''){
$tmp2 .= ':';
$tmp2 .= $with_or_from[$with_or_from_fc_id]->field_object_id[$annotation->language][0]['value'];
}
}
}
}
$data_source = entity_load('field_collection_item', array($data_source_fc_id));
$gene = node_load($annotation->field_gene[$annotation->language][0]['target_id']);
$species = node_load($annotation->field_ref_species[$annotation->language][0]['target_id']);
$source_term = taxonomy_term_load($data_source[$data_source_fc_id]->field_source_name[$annotation->language][0]['tid']);
$tmp3 = '';
for($x=0;$xfield_synonyms);$x++){
$synonym = explode("|",$gene->field_synonyms[$gene->language][$x]['value']);
if($x!=0){$tmp3.='|';}
$tmp3 .= $synonym[0];
if(count($synonym)!=1){
$tmp3 .= '|'.$synonym[1];
}
}
//col 1
$db = 'planteome';
//col 2
$db_object_id = $annotation->title;
//col 3
$db_object_symbol = $gene->field_gene_id[$gene->language][0]['value'];
//col 4
if($annotation->field_qualifier[$annotation->language][0]['value'] == NULL){
$qualifier = '';
} else {
$qualifier = $annotation->field_qualifier[$annotation->language][0]['value'];
}
//col 5
$go_id = get_ontology_id_from_term($annotation->field_ontology_id[$annotation->language][0]['value']);
//col 6
$db_reference = $tmp1;
//col 7
$evidence_code = taxonomy_term_load($annotation->field_evidence_code[$annotation->language][0]['tid'])->field_machine_name[$annotation->language][0]['value'];
//col 8
$with_or_from = $tmp2;
//col 9
$aspect = taxonomy_term_load($annotation->field_aspect[$annotation->language][0]['tid'])->field_machine_name[$annotation->language][0]['value'];
//col 10
$db_object_name = $gene->title;
//col 11
if($tmp3 == ''){
$db_object_synonyms = $gene->field_gene_id[$gene->language][0]['value'].'|'.$gene->field_gene_symbol[$gene->language][0]['value'];
} else {
$db_object_synonyms = $tmp3.'|'.$gene->field_gene_id[$gene->language][0]['value'].'|'.$gene->field_gene_symbol[$gene->language][0]['value'];
}
//col 12
$zzz = taxonomy_term_load($gene->field_gene_type[$gene->language][0]['tid']);
$db_object_type = $zzz->name;
//col 13
$taxon = 'taxon:'.$species->field_taxon_ncbi_id[$species->language][0]['value'];
//col 14
$date = date( 'Ymd', $annotation->field_date[$annotation->language][0]['value']);
//col 15
$assigned_by = $annotation->field_assigned_by[$annotation->language][0]['value'];
//col 16
if($annotation->field_annotation_extension[$annotation->language][0]['value'] == NULL){
$extension = '';
} else {
$extension = $annotation->field_annotation_extension[$annotation->language][0]['value'];
}
//col 17
if($annotation->field_gene_product_form_id[$annotation->language][0]['value'] == NULL){
$gene_product_form_id = '';
} else {
$gene_product_form_id = $annotation->field_gene_product_form_id[$annotation->language][0]['value'];
}
$gaf_line = $db."\t".$db_object_id."\t".$db_object_symbol."\t".$qualifier."\t".$go_id."\t".$db_reference."\t".$evidence_code."\t".$with_or_from."\t".$aspect."\t".$db_object_name."\t".$db_object_synonyms."\t".$db_object_type."\t".$taxon."\t".$date."\t".$assigned_by."\t".$extension."\t".$gene_product_form_id."\n";
return $gaf_line;
}
function update_sources(){
//will want something here to check that the yaml response is valid.
//TODO 6 - Lakshit
// - Investigate how we can validate the YAML data we're pulling
// - Would replace the (1) after this next line
$source_yaml_array = parse_sources_yaml();
if (1) {
//good YAML read - proceed
foreach($source_yaml_array as $source){
$drupal_name = $source['human_name'].' ('.$source['machine_name'].')';
$term_array = taxonomy_get_term_by_name($drupal_name);
if(count($term_array) !== 0){
//source term already exists
$term = reset($term_array);
$term->field_machine_name[LANGUAGE_NONE][0]['value'] = $source['machine_name'];
$term->field_flag_no_yaml_entities[LANGUAGE_NONE][0]['value'] = $source['no_yaml_entities'];
$term->field_flag_no_entity_type_name[LANGUAGE_NONE][0]['value'] = $source['no_entity_type_name'];
$term->field_flag_no_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $source['no_entity_id_syntax'];
$term->field_flag_no_url_syntax[LANGUAGE_NONE][0]['value'] = $source['no_url_syntax'];
taxonomy_term_save($term);
if($source['no_yaml_entities'] == 0){
$field_collection_item_values = array();
foreach ($term->field_yaml_entity[LANGUAGE_NONE] as $key => $value) {
$field_collection_item_values[] = $value['value'];
unset($term->field_yaml_entity[LANGUAGE_NONE][$key]);
}
entity_delete_multiple('field_collection_item', $field_collection_item_values);
foreach($source['yaml_entities'] as $yaml_entity){
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_yaml_entity'));
$field_collection_item->setHostEntity('taxonomy_term', $term);
if(array_key_exists('entity_type_name', $yaml_entity)){
$field_collection_item->field_yaml_entity_type_name[LANGUAGE_NONE][0]['value'] = $yaml_entity['entity_type_name'];
}
if(array_key_exists('entity_id_syntax', $yaml_entity)){
$field_collection_item->field_yaml_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $yaml_entity['entity_id_syntax'];
}
if(array_key_exists('url_syntax', $yaml_entity)){
$field_collection_item->field_yaml_entity_url_syntax[LANGUAGE_NONE][0]['value'] = $yaml_entity['url_syntax'];
}
$field_collection_item->save();
$term->field_yaml_entity[LANGUAGE_NONE][]['value'] = $field_collection_item->item_id;
}
}
taxonomy_term_save($term);
} else {
//TODO 1 - Jacob
//Do we really need to add in new sources? Shouldn't they only be using sources from the yaml file sent to us?
//source term doesn't exist, need to create it
$sources_tree = taxonomy_vocabulary_machine_name_load('sources');
$source_vid = $sources_tree->vid;
$term = new stdClass();
$term->name = $drupal_name;
$term->field_machine_name[LANGUAGE_NONE][0]['value'] = $source['machine_name'];
$term->vid = $source_vid;
$term->field_flag_unknown_source_name[LANGUAGE_NONE][0]['value'] = 0;
$term->field_flag_no_yaml_entities[LANGUAGE_NONE][0]['value'] = $source['no_yaml_entities'];
$term->field_flag_no_entity_type_name[LANGUAGE_NONE][0]['value'] = $source['no_entity_type_name'];
$term->field_flag_no_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $source['no_entity_id_syntax'];
$term->field_flag_no_url_syntax[LANGUAGE_NONE][0]['value'] = $source['no_url_syntax'];
taxonomy_term_save($term);
if($source['no_yaml_entities'] == 0){
foreach($source['yaml_entities'] as $yaml_entity){
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_yaml_entity'));
$field_collection_item->setHostEntity('taxonomy_term', $term);
if(array_key_exists('entity_type_name', $yaml_entity)){
$field_collection_item->field_yaml_entity_type_name[LANGUAGE_NONE][0]['value'] = $yaml_entity['entity_type_name'];
}
if(array_key_exists('entity_id_syntax', $yaml_entity)){
$field_collection_item->field_yaml_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $yaml_entity['entity_id_syntax'];
}
if(array_key_exists('url_syntax', $yaml_entity)){
$field_collection_item->field_yaml_entity_url_syntax[LANGUAGE_NONE][0]['value'] = $yaml_entity['url_syntax'];
}
$field_collection_item->save();
$term->field_yaml_entity[LANGUAGE_NONE][]['value'] = $field_collection_item->item_id;
}
}
taxonomy_term_save($term);
}
}
} else {
//here is where we would handle a YAML error
}
}
//annotation batch processes
function array_flatten($array = null) {
$result = array();
if (!is_array($array)) {
$array = func_get_args();
}
foreach ($array as $key => $value) {
if (is_array($value)) {
$result = array_merge($result, array_flatten($value));
} else {
$result = array_merge($result, array($key => $value));
}
}
return $result;
}
function import_annotation_to_node($chunk, $id_syntax, &$context){
$chunk = parse_gaf_lines($chunk);
foreach($chunk as $data){
//$data contains an array representing the data from a GAF file for a given annotation
$possible_symbols = array();
array_push($possible_symbols,explode("|", $data['object_symbol']),explode("|", $data['synonyms1']),explode("|", $data['synonyms2']));
$possible_symbols = array_unique(array_flatten($possible_symbols));
$possible_symbols = implode(',',$possible_symbols);
$regex = '/';
$strlen = strlen( $id_syntax );
for( $i = 0; $i < $strlen; $i++ ) {
$char = substr( $id_syntax, $i, 1 );
if($char == '#'){
$regex .= '\d';
} else {
$regex .= '['.strtoupper($char).strtolower($char).']';
}
}
$regex .= '/s';
//$id is the key
preg_match($regex, $possible_symbols, $id);
if(count($id)==0){
continue;
}
$taxon_number = array_reverse(explode(':',$data['taxon_id']))[0];
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->fieldCondition('field_taxon_ncbi_id', 'value', $taxon_number); //this will be updated when we change the context, and can likely be moved outside the batch process altogether
$results = $query->execute();
$species_nid = array_keys($results['node'])[0];
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','gene')
->propertyCondition('type',array('gene'))
->fieldCondition('field_gene_id','value', $id)
->fieldCondition('field_ref_species', 'target_id', $species_nid, '=');
$results = $query->execute();
//the nid of the gene pointed to by the annotation
if(empty($results)){
continue;
}
$gene_nid = array_keys($results['node'])[0];
global $user;
$node = new stdClass();
//TODO 7 - Lakshit
// - Verify that this AXXXXXXXXXX number is being correctly tracked and autoincremented
$node->title = 'A'.format_annotation_accession();//TODO - track this like our internal accession
$node->type = "annotation";
node_object_prepare($node); // Sets some defaults. Invokes hook_prepare() and hook_node_prepare().
$node->language = LANGUAGE_NONE;
$node->uid = $user->uid;
$node->status = 1;
$node->promote = 0;
$node->comment = 1;
//ANNOTATION FIELDS are in the same order they are in the Drupal content type
//qualifier
$node->field_qualifier[$node->language][0]['value'] = $data['annotation_qualifier'];
//ontology ID
$node->field_ontology_id[$node->language][0]['value'] = $data['GO_ID'];
//date
//TODO 8 - Lakshit
// - Verify that the timestamp here is in UNIX format
$date = strtotime($data['date']);
$node->field_date[$node->language][0]['value'] = $date;
//assigned by
$node->field_assigned_by[$node->language][0]['value'] = $data['assigned_by'];
//annotation extension
$node->field_annotation_extension[$node->language][0]['value'] = $data['annotation_extension'];
//gene product form ID
$node->field_gene_product_form_id[$node->language][0]['value'] = $data['gene_product_form_id'];
//evidence code
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'taxonomy_term')
->fieldCondition('field_machine_name', 'value', $data['evidence'], '=');
$result = $query->execute();
if(!empty($result['taxonomy_term'])){
//term already exists
$term = reset($result['taxonomy_term']);
//echo'';var_dump($term->tid);exit;
$node->field_evidence_code[$node->language][0]['tid'] = $term->tid; //term id of the term which is in taxonomy
} else {
//term doesn't exist, this is wrong. :P
continue;
}
//aspect
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'taxonomy_term')
->fieldCondition('field_machine_name', 'value', $data['aspect'], '=');
$result = $query->execute();
if(!empty($result['taxonomy_term'])){
//term already exists
$term = reset($result['taxonomy_term']);
$node->field_aspect[$node->language][0]['tid'] = $term->tid; //term id of the term which is in taxonomy
} else {
//term doesn't exist, this is wrong
continue;
}
//gene
$node->field_gene[$node->language][0]['target_id'] = $gene_nid;
//species
$node->field_ref_species[$node->language][0]['target_id'] = $species_nid;
//data source
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'taxonomy_term')
->fieldCondition('field_machine_name', 'value', $data['source'], '=');
$result = $query->execute();
if(!empty($result['taxonomy_term'])) {
//term already exists
$term = reset($result['taxonomy_term']);
//term id of the term which is in taxonomy
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('sources');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = $data['source'];
$term->field_flag_no_yaml_entities[LANGUAGE_NONE][0]['value'] = $source['no_yaml_entities'];
$term->field_flag_no_entity_type_name[LANGUAGE_NONE][0]['value'] = $source['no_entity_type_name'];
$term->field_flag_no_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $source['no_entity_id_syntax'];
$term->field_flag_no_url_syntax[LANGUAGE_NONE][0]['value'] = $source['no_url_syntax'];
$term->vid = $taxon_vid;
taxonomy_term_save($term);
//term id of the term which is in taxonomy
}
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_data_source'));
$field_collection_item->setHostEntity('node', $node);
$field_collection_item->field_source_name[$node->language][0]['tid'] = $term->tid;
$field_collection_item->field_object_id[$node->language][0]['value'] = $data['object'];
$field_collection_item->save();
$node->field_data_source[$node->language][0]['value'] = $field_collection_item->item_id;
//DB Reference
$dbref_array = explode("|", $data['reference']);
for($x=0;$xentityCondition('entity_type', 'taxonomy_term')
->fieldCondition('field_machine_name', 'value', $first, '=');
$result = $query->execute();
if(!empty($result['taxonomy_term'])) {
//term already exists
$term = reset($result['taxonomy_term']);
//term id of the term which is in taxonomy
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('sources');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = $data['source'];
$term->field_flag_no_yaml_entities[LANGUAGE_NONE][0]['value'] = $source['no_yaml_entities'];
$term->field_flag_no_entity_type_name[LANGUAGE_NONE][0]['value'] = $source['no_entity_type_name'];
$term->field_flag_no_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $source['no_entity_id_syntax'];
$term->field_flag_no_url_syntax[LANGUAGE_NONE][0]['value'] = $source['no_url_syntax'];
$term->vid = $taxon_vid;
taxonomy_term_save($term);
//term id of the term which is in taxonomy
}
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_db_reference'));
$field_collection_item->setHostEntity('node', $node);
$field_collection_item->field_source_name[$node->language][$x]['tid'] = $term->tid;
$field_collection_item->field_object_id[$node->language][$x]['value'] = $parts;
$field_collection_item->save();
$node->field_db_reference[$node->language][$x]['value'] = $field_collection_item->item_id;
}
//field_publications
// generate_biblio_from_pmid(IN)
// echo'';var_dump($dbref_array);exit;
for($x=0;$x' ;var_dump(node_load($biblio_nid));exit;
$node->field_publication[$node->language][$x]['target_id'] = $biblio_nid;
// $node->field_publications[$node->language][$x]['entity'] = node_load($biblio_nid);
// $node->field_publications[$node->language][$x]['access'] = TRUE;
}
// echo'';var_dump('test');exit;
//With or From
if($data['withfrom'] != ''){
$withfrom_array = explode("|", $data['withfrom']);
for($x=0;$xentityCondition('entity_type', 'taxonomy_term')
->fieldCondition('field_machine_name', 'value', $first, '=');
$result = $query->execute();
if(!empty($result['taxonomy_term'])) {
//term already exists
$term = reset($result['taxonomy_term']);
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('sources');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = $data['source'];
$term->field_flag_no_yaml_entities[LANGUAGE_NONE][0]['value'] = $source['no_yaml_entities'];
$term->field_flag_no_entity_type_name[LANGUAGE_NONE][0]['value'] = $source['no_entity_type_name'];
$term->field_flag_no_entity_id_syntax[LANGUAGE_NONE][0]['value'] = $source['no_entity_id_syntax'];
$term->field_flag_no_url_syntax[LANGUAGE_NONE][0]['value'] = $source['no_url_syntax'];
$term->vid = $taxon_vid;
taxonomy_term_save($term);
//term id of the term which is in taxonomy
}
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_with_or_from'));
$field_collection_item->setHostEntity('node', $node);
$field_collection_item->field_source_name[$node->language][$x]['tid'] = $term->tid;
$field_collection_item->field_object_id[$node->language][$x]['value'] = $parts;
$field_collection_item->save();
$node->field_with_or_from[$node->language][$x]['value'] = $field_collection_item->item_id;
}
}
//should be done
$node = node_submit($node);
node_save($node);
}
}
function generate_biblio_from_pmid($pmid){
if(module_exists('biblio_pm')){
if (strlen($pmid)) {
$node = biblio_pm_fetch_pmid($pmid);
if(!empty($node)){
$dup = false;
if(biblio_crossref_check_doi($node->biblio_doi)){
$dup =biblio_crossref_check_doi($node->biblio_doi);
}else if(biblio_pm_check_pmid($node->biblio_pubmed_id)){
$dup = biblio_pm_check_pmid($node->biblio_pubmed_id);
}
if (!$dup) {
$node->comment = 0;
$node = node_submit($node);
node_save($node);
return $node->nid; //node id of newly created node
} else {
return $dup; //node id of already existing node
}
}
}
}
}
function generate_biblio_from_doi($string){
if(module_exists('biblio_crossref')){
global $user;
$node = array();
if (strlen($doi = $string)) {
if (($doi_start = strpos($doi, '10.')) !== FALSE) {
$crossref_pid = variable_get('biblio_crossref_pid', '');
$user_pid = (isset($user->data['biblio_crossref_pid']) && !empty($user->data['biblio_crossref_pid'])) ? $user->data['biblio_crossref_pid'] : '';
if (variable_get('biblio_show_crossref_profile_form', '1') && !empty($user_pid)) {
$crossref_pid = $user_pid;
}
if(!empty($crossref_pid)){
module_load_include('php', 'biblio_crossref', 'biblio.crossref.client');
$client = new BiblioCrossRefClient($doi, $crossref_pid);
$node = $client->fetch();
if(!empty($node)){
if (!($dup = biblio_crossref_check_doi($doi))) {
$node->comment = 0;
$node = node_submit($node);
node_save($node);
return $node->nid; //node id of newly created node
}else{
return $dup; //node id of already existing node
}
}
}
}
}
}
}
function generate_biblio_from_other($string){
global $user;
$node = new stdClass();
$node->title = $string;
$node->type = 'biblio';
node_object_prepare($node);
$node->language = LANGUAGE_NONE;
$node->uid = $user->uid;
$node->status = 1;
$node->promote = 0;
$node->comment = 1;
$node->biblio_type = "102"; //journal article
$node->biblio_year = "Submitted"; //default value for years
$node->biblio_other_number = $string;
$node = node_submit($node);
node_save($node);
return $node->nid;
}
function gaf_process_finished($success, $results, $operations) {
//Let the user know we have finished!
print t('Finished importing!');
}
//gene batch processes
//Inserts aliases from the alias file into a temp table in order to build the synonyms
function insert_gene_aliases($chunk2, &$context){
foreach($chunk2 as $data){
if($data['locus_name']==''||$data['symbol']==''){
echo'';var_dump($data);exit();
}
db_insert('tmptable')
->fields(array(
'gene_stable_id' => $data['locus_name'],
'gene_symbol' => $data['symbol'],
'gene_name' => $data['full_name']
))
->execute();
}
}
//Main callback to import gene data from file.
function import_gene_data_to_node($chunk1, $species_nid, $source_name, &$context){
foreach($chunk1 as $data){
//results is an array that holds each match from alias file
$results = db_select('tmptable', 'tmp')
->fields('tmp')
->condition('gene_stable_id', $data['Gene stable ID'],'=')
->execute()
->fetchAll();
$known = array();
$results = array_filter($results, function ($val) use (&$known) {
$unique = !in_array($val->name, $known);
$known[] = $val->name;
return $unique;
});
if(empty($results)){
$gene_name = 'Known Gene '.$data['Gene stable ID'];
$gene_symbol = $data['Gene stable ID'];
} else {
$gene_name = $results[0]->gene_name;
$gene_symbol = $results[0]->gene_symbol;
}
array_shift($results);
//$results is now the array containing all remaining synonyms - need to check if empty
global $user;
$node = new stdClass();
$node->title = $gene_symbol.' '.$gene_name;
$node->type = "gene";
node_object_prepare($node);
$node->language = LANGUAGE_NONE;
$node->uid = $user->uid;
$node->status = 1;
$node->promote = 0;
$node->comment = 1;
//description
$node->body[$node->language][0]['value'] = $data['Gene description'];
$node->body[$node->language][0]['summary'] = text_summary($data['Gene description']);
$node->body[$node->language][0]['format'] = 'full_html';
//accession
$node->field_accession[$node->language][0]['value'] = 'P'.format_accession();
//symbol
$node->field_gene_symbol[$node->language][0]['value'] = $gene_symbol;
//name
$node->field_gene_name[$node->language][0]['value'] = $gene_name;
//synonyms - remember the first match of results was removed from the array
if(count($results)){
$syns_arr = array();
foreach($results as $results){
$syns_arr[] = $results->gene_symbol;
$syns_arr[] = $results->gene_name;
}
for($x=0;$xfield_synonyms[$node->language][$x]['value'] = $syns_arr[$x];
}
}
//gene ID
$node->field_gene_id[$node->language][0]['value'] = $data['Gene stable ID'];
//Chromosome fields
$node->field_chromosome_no[$node->language][0]['value'] = $data['Chromosome'];
$node->field_chromosome_start[$node->language][0]['value'] = (int)$data['Gene start (bp)'];
$node->field_chromosome_stop[$node->language][0]['value'] = (int)$data['Gene end (bp)'];
//species - will need to make this dynamic later, should be easy
//species nid passed as variable
$node->field_ref_species[$node->language][0]['target_id'] = $species_nid;
//gene type
$term_array = taxonomy_get_term_by_name($data['Gene type']);
if(count($term_array) !== 0){
//term already exists
$term = reset($term_array);
$node->field_gene_type[$node->language][0]['tid'] = $term->tid; //term id of the term which is in taxonomy
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('gene_type');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = $data['Gene type'];
$term->vid = $taxon_vid;
taxonomy_term_save($term);
$node->field_gene_type[$node->language][0]['tid'] = $term->tid; //term id of the term which is in taxonomy
}
//data source
$term_array = taxonomy_get_term_by_name($source_name);
if(count($term_array) !== 0){
//term already exists
$term = reset($term_array);
//exit(($term->tid));
//term id of the term which is in taxonomy
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('sources');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = 'Gramene';
$term->vid = $taxon_vid;
taxonomy_term_save($term);
//term id of the term which is in taxonomy
}
$field_collection_item = entity_create('field_collection_item', array('field_name' => 'field_data_source'));
$field_collection_item->setHostEntity('node', $node);
$field_collection_item->field_source_name[$node->language][0]['tid'] = $term->tid;
$field_collection_item->field_object_id[$node->language][0]['value'] = $data['Gene stable ID'];
$field_collection_item->save();
$node->field_data_source[$node->language][0]['value'] = $field_collection_item->item_id;
$node = node_submit($node);
node_save($node);
}
}
function format_accession(){
$results = db_select('accession', 'acc')
->fields('acc')
->execute()
->fetchAssoc();
$accession_int = $results['accession'];
$accession_number = (string)$accession_int;
$acc_len = strlen($accession_number);
for($acc_len;$acc_len < 12; $acc_len++){
$accession_number = '0'.$accession_number;
}
$accession_int++;
$query = db_update('accession')
->fields(array(
'accession' => $accession_int))
->execute();
$results = db_select('accession', 'acc')
->fields('acc')
->execute()
->fetchAssoc();
return $accession_number;
}
function format_annotation_accession(){
$results = db_select('annotation_accession', 'ann_acc')
->fields('ann_acc')
->execute()
->fetchAssoc();
$accession_int = $results['accession'];
$accession_number = (string)$accession_int;
$acc_len = strlen($accession_number);
for($acc_len;$acc_len < 12; $acc_len++){
$accession_number = '0'.$accession_number;
}
$accession_int++;
$query = db_update('annotation_accession')
->fields(array(
'accession' => $accession_int))
->execute();
$results = db_select('annotation_accession', 'ann_acc')
->fields('ann_acc')
->execute()
->fetchAssoc();
return $accession_number;
}
function biomart_process_finished($success, $results, $operations) {
//Let the user know we have finished!
print t('Finished importing!');
}
function validate_database_reference($db_xref_string){
}
function tabbed_file_to_array($content){
$array = explode("\r\n",$content);
$fields = array_shift($array);
$fields = explode("\t",$fields);
$ret_arr = array();
//see https://stackoverflow.com/questions/20151517/php-chars-added-when-exploding-on-newline
foreach($array as $gene){
$gene = explode("\t",$gene);
$tmp_arr = array();
for ($x = 0; $x < count($fields); $x++) {
$tmp_arr[$fields[$x]] = $gene[$x];
}
$ret_arr[] = $tmp_arr;
}
return $ret_arr;
}
function tabbed_file_to_array_alt($content){
$array = explode("\n",$content);
$fields = array_shift($array);
$fields = explode("\t",$fields);
$ret_arr = array();
//see https://stackoverflow.com/questions/20151517/php-chars-added-when-exploding-on-newline
foreach($array as $gene){
$gene = explode("\t",$gene);
$tmp_arr = array();
for ($x = 0; $x < count($fields); $x++) {
$tmp_arr[$fields[$x]] = $gene[$x];
}
$ret_arr[] = $tmp_arr;
}
return $ret_arr;
}
function autocomplete_genes($string){
if(strlen($string) > 6){
$matches = array();
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle','gene')
->propertyCondition('type',array('gene'))
->fieldCondition('field_gene_id', 'value', '%'.$string.'%', 'LIKE');
$results = $query->execute();
foreach($results['node'] as $result){
$node = node_load($result->nid);
$matches[$node->title] = check_plain($node->title);
}
drupal_json_output($matches);
}
}
function autocomplete_aspect($string){
$vocab = taxonomy_vocabulary_machine_name_load('evidence_code');
$matches = array();
$result = db_select('taxonomy_term_data', 't')
-> fields('t', array('tid', 'name'))
-> condition('vid', $vocab->vid, '=')
-> condition('name', $string.'%%', 'LIKE')
-> range(0, 10)
-> execute();
foreach ($result as $term) {
$matches[$term->name] = check_plain($term->name);
}
drupal_json_output($matches);
}
function autocomplete_evidence_code($string){
$vocab = taxonomy_vocabulary_machine_name_load('aspect');
$matches = array();
$result = db_select('taxonomy_term_data', 't')
-> fields('t', array('tid', 'name'))
-> condition('vid', $vocab->vid, '=')
-> condition('name', $string.'%%', 'LIKE')
-> range(0, 10)
-> execute();
foreach ($result as $term) {
$matches[$term->name] = check_plain($term->name);
}
drupal_json_output($matches);
}
function autocomplete_sources($string){
$vocab = taxonomy_vocabulary_machine_name_load('sources');
$matches = array();
$result = db_select('taxonomy_term_data', 't')
-> fields('t', array('tid', 'name'))
-> condition('vid', $vocab->vid, '=')
-> condition('name', $string.'%%', 'LIKE')
-> range(0, 10)
-> execute();
foreach ($result as $term) {
$matches[$term->name] = check_plain($term->name);
}
drupal_json_output($matches);
}
function autocomplete_species($string){
$matches = array();
$term_array = taxonomy_get_term_by_name('species');
$term_array_2 = taxonomy_get_term_by_name('subspecies');
$term = reset($term_array);
$term_2 = reset($term_array_2);
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->propertyCondition('title', '%'.$string.'%','LIKE')
->fieldCondition('field_taxon_rank','tid',array($term->tid, $term_2->tid));
$results = $query->execute();
foreach($results['node'] as $result){
$node = node_load($result->nid);
$matches[$node->title] = check_plain($node->title);
}
drupal_json_output($matches);
}
function autocomplete_ontology_term($string){
//use http://browser.planteome.org/api/autocomplete/ontology?q=STRING
$matches = array();
$html_json = htmlspecialchars(curl_get_contents("http://browser.planteome.org/api/autocomplete/ontology?q=".url_encode_2($string)));
$tmp = html_entity_decode($html_json);
$json_obj = json_decode($tmp);
foreach($json_obj->data as $match){
// use ->id to pass GOID
$matches[$match->id] = check_plain($match->annotation_class_label);
}
drupal_json_output($matches);
}
function autocomplete_ontology_term_extended($string){
//use http://browser.planteome.org/api/autocomplete/ontology?q=STRING
$matches = array();
$html_json = htmlspecialchars(curl_get_contents("http://browser.planteome.org/api/autocomplete/ontology?q=".url_encode_2($string)));
$tmp = html_entity_decode($html_json);
$json_obj = json_decode($tmp);
foreach($json_obj->data as $match){
// use ->id to pass GOID
$matches[$match->id] = check_plain($match->annotation_class_label.' ('.$match->id.')');
}
drupal_json_output($matches);
}
function get_ontology_term_from_id($id_string) {
$html_json = htmlspecialchars(curl_get_contents("http://browser.planteome.org/api/autocomplete/ontology?q=".url_encode_2($id_string)));
$tmp = html_entity_decode($html_json);
$json_obj = json_decode($tmp);
$output = $id_string;
if(is_object($json_obj) && count($json_obj->data)){
foreach($json_obj->data as $data){
if($data->id == $id_string){
$output = $data->annotation_class_label;
}
}
}
return $output;
}
function get_ontology_id_from_term($term_string) {
// if($id_string == '(R,R)-butanediol dehydrogenase activity'){
// return 'same';
// echo'';var_dump(url_encode_2($id_string));exit;
// }
$html_json = htmlspecialchars(curl_get_contents("http://browser.planteome.org/api/autocomplete/ontology?q=".url_encode_2($term_string)));
$tmp = html_entity_decode($html_json);
$json_obj = json_decode($tmp);
$output = $term_string;
if(is_object($json_obj) && count($json_obj->data)){
foreach($json_obj->data as $data){
if($data->annotation_class_label == $term_string){
$output = $data->id;
}
}
}
return $output;
}
/**
* get_children($taxon_id, $remaining_entities).
*
* recursively builds the full taxonomic tree starting at a given NCBITaxon_ID (for the entire tree in the OWL file, use taxon_id = 1)
*
* @param taxon_id
* The current NCBITaxon ID
* @param remaining_entities
* the one-level array of all entities in the OWL file.
* Could be optimized by unsetting each element as its added to the true tree, but unnecessary for our dataset.
*/
function get_children($taxon_id, $remaining_entities){
foreach ($remaining_entities as $index => $entity) {
if($entity['id'] == $taxon_id){
$ret_ent = $entity;
foreach($remaining_entities as $index2 => $entity2){
if($entity2['parent'] == $ret_ent['id']){
$ret_ent['children'][] = get_children($entity2['id'],$remaining_entities);
}
}
return $ret_ent;
}
}
}
/**
* import_owl_data($entity).
*
* Called on the entities built in the parse_owl and get_children functions. Takes an entity array and creates a taxonomy node
*
* @param $entity
* An array with the following indexes:
* [id] => int
* [rank] => string
* [label] => string
* [parent] => int
* [synonyms][related] => string array
* [synonyms][exact] => string array
*/
function import_owl_data($entity){
/*@todo We need to check if a node is already added with a NCBI taxon id */
$query = new EntityFieldQuery();
$query->entityCondition('entity_type', 'node')
->entityCondition('bundle','taxon')
->propertyCondition('type',array('taxon'))
->fieldCondition('field_taxon_ncbi_id','value',$entity['id']);
$results = $query->execute();
if(!empty($results)){
//node with this NCBITaxonID already exists
return;
}
global $user;
$node = new stdClass();
$node->title = $entity['label'];
$node->type = "taxonomy";
node_object_prepare($node);
$node->language = LANGUAGE_NONE;
$node->uid = $user->uid;
$node->status = 1;
$node->promote = 0;
$node->comment = 1;
$node->field_taxon_ncbi_id['und'][0]['value'] = $entity['id'];
$term_array = taxonomy_get_term_by_name($entity['rank']);
if(count($term_array) !== 0){
//term already exists
$term = reset($term_array);
$node->field_taxon_rank['und'][0]['tid'] = $term->tid;
//term id of the term which is in taxonomy
} else {
//term doesn't exist, need to create it
$taxon_tree = taxonomy_vocabulary_machine_name_load('taxonomy');
$taxon_vid = $taxon_tree->vid;
$term = new stdClass();
$term->name = $entity['rank'];
$term->vid = $taxon_vid;
taxonomy_term_save($term);
$node->field_taxon_rank_tid['und'][0]['tid'] = $term->tid;
//term id of the term which is in taxonomy
}
//need to iterate over all entities and get parent entity id based on taxonid
//if entity id is 1, we're at the root, so skip adding a parent for that
if($entity['id'] != 1){
//get the entity which is the parent
$query = new EntityFieldQuery();
$query->entityCondition('entity_type','node')
->entityCondition('bundle','taxonomy')
->propertyCondition('type',array('taxonomy'))
->fieldCondition('field_taxon_ncbi_id','value',$entity['parent']);
$results = $query->execute();
$parent_nid = array_keys($results['node'])[0];
if(!empty($results)){
//found parent
$node->field_taxon_parent['und'][0] = array(
'target_id' => $parent_nid,
'target_type' => 'node');
}
}
if($entity['synonyms']['related'][0] != 'NO RELATED'){
foreach($entity['synonyms']['related'] as $synonym){
//handle
$node->field_taxon_related_synonyms['und'][]['value'] = $synonym;
}
}
if($entity['synonyms']['exact'][0] != 'NO EXACT'){
foreach($entity['synonyms']['exact'] as $synonym){
//handle
$node->field_taxon_exact_synonyms['und'][]['value'] = $synonym;
}
}
$node = node_submit($node);
node_save($node);
}
/**
* Create a taxonomy term and return the tid.
*/
function custom_create_taxonomy_term($name, $vid) {
$term = new stdClass();
$term->name = $name;
$term->vid = $vid;
taxonomy_term_save($term);
return $term->tid;
}
function iterate($array){
import_owl_data($array);
if(array_key_exists('children',$array)){
foreach($array['children'] as $child){
iterate($child);
}
}
}
function taxonomy_to_select_array($string) {
$vocab = taxonomy_vocabulary_machine_name_load($string);
$terms = taxonomy_get_tree($vocab->vid);
$output = array();
$output[] = '';
foreach ($terms as $data) {
$output[$data->tid] = $data->name;
}
return $output;
}
function url_encode_2($string) {
$entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40', '%26', '%3D', '%2B', '%24', '%2C', '%2F', '%3F', '%25', '%23', '%5B', '%5D');
$replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "+", "$", ",", "/", "?", "%", "#", "[", "]");
return str_replace($entities, $replacements, urlencode($string));
}
function get_string_between($string, $start, $end){
$string = ' ' . $string;
$ini = strpos($string, $start);
if ($ini == 0) return '';
$ini += strlen($start);
$len = strpos($string, $end, $ini) - $ini;
return substr($string, $ini, $len);
}
function curl_get_contents($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function cgrb_data_import_form_node_form_alter(&$form, &$form_state, $form_id) {
// Find the content type of the node we are editing.
$content_type = $form['#node']->type;
if ($content_type == 'annotation') {
$form['actions']['clone'] = array(
'#type' => 'submit',
'#value' => 'Clone',
'#name' => 'op',
'#submit' => array('clone_annotation'), /* NEED this */
);
}
}
function clone_annotation($form, &$form_state) {
//echo'';var_dump(menu_get_object());exit;
//not workin, try this one https://www.drupal.org/forum/support/module-development-and-code-questions/2011-02-27/custom-form_stateredirect-is-not
$form_state['redirect'] = '/osu-cgrb/website/node/add/annotation?clone_nid='.menu_get_object()->nid;
//drupal_goto('osu-cgrb/website/node/add/annotation', array('query'=> array('clone_nid'=> ''.menu_get_object()->nid)));
}
/************ACCESS FUNCTION*************/
function access_function() {
global $user;
if (in_array('administrator', $user->roles)
|| in_array('Contributors', $user->roles)
|| in_array('Curators', $user->roles)
|| in_array('Moderators', $user->roles)) {
return true;
} else {
return false;
}
}