'Consolidate Table from a large amount of xml files to a text file in Linux terminal

xml file contents

<ligand>Ligand1.pdbqt</ligand>
<result>
<clustering_histogram>
<cluster cluster_rank="1" lowest_binding_energy="-6.08" run="8" mean_binding_energy="-6.00" num_in_clus="5"/>
<cluster cluster_rank="2" lowest_binding_energy="-6.07" run="43" mean_binding_energy="-5.97" num_in_clus="5"/>
<cluster cluster_rank="3" lowest_binding_energy="-6.06" run="37" mean_binding_energy="-5.96" num_in_clus="11"/>
<cluster cluster_rank="4" lowest_binding_energy="-6.01" run="39" mean_binding_energy="-5.95" num_in_clus="7"/>
<cluster cluster_rank="5" lowest_binding_energy="-6.01" run="49" mean_binding_energy="-5.96" num_in_clus="6"/>
<cluster cluster_rank="6" lowest_binding_energy="-5.99" run="22" mean_binding_energy="-5.91" num_in_clus="2"/>
<cluster cluster_rank="7" lowest_binding_energy="-5.96" run="36" mean_binding_energy="-5.93" num_in_clus="6"/>
<cluster cluster_rank="8" lowest_binding_energy="-5.95" run="42" mean_binding_energy="-5.95" num_in_clus="2"/>
<cluster cluster_rank="9" lowest_binding_energy="-5.95" run="27" mean_binding_energy="-5.92" num_in_clus="3"/>
<cluster cluster_rank="10" lowest_binding_energy="-5.94" run="4" mean_binding_energy="-5.94" num_in_clus="1"/>
<cluster cluster_rank="11" lowest_binding_energy="-5.94" run="3" mean_binding_energy="-5.91" num_in_clus="2"/>
</clustering_histogram>
<rmsd_table>
<run rank="1" sub_rank="1" run="8" binding_energy="-6.08" cluster_rmsd="0.00" reference_rmsd="2.87"/>
<run rank="1" sub_rank="2" run="25" binding_energy="-6.05" cluster_rmsd="0.07" reference_rmsd="2.88"/>
<run rank="1" sub_rank="3" run="10" binding_energy="-5.97" cluster_rmsd="1.19" reference_rmsd="2.58"/>
<run rank="1" sub_rank="4" run="31" binding_energy="-5.97" cluster_rmsd="1.28" reference_rmsd="2.81"/>
<run rank="1" sub_rank="5" run="11" binding_energy="-5.94" cluster_rmsd="1.20" reference_rmsd="2.61"/>
<run rank="2" sub_rank="1" run="43" binding_energy="-6.07" cluster_rmsd="0.00" reference_rmsd="3.75"/>
<run rank="2" sub_rank="2" run="46" binding_energy="-6.00" cluster_rmsd="1.60" reference_rmsd="5.08"/>
<run rank="2" sub_rank="3" run="14" binding_energy="-5.94" cluster_rmsd="1.11" reference_rmsd="4.26"/>
<run rank="2" sub_ranak="4" run="24" binding_energy="-5.94" cluster_rmsd="0.21" reference_rmsd="3.66"/>
<run rank="2" sub_rank="5" run="35" binding_energy="-5.92" cluster_rmsd="0.33" reference_rmsd="3.72"/>
<run rank="3" sub_rank="1" run="37" binding_energy="-6.06" cluster_rmsd="0.00" reference_rmsd="3.42"/>
<run rank="3" sub_rank="2" run="48" binding_energy="-6.05" cluster_rmsd="1.53" reference_rmsd="4.55"/>
<run rank="3" sub_rank="3" run="34" binding_energy="-5.99" cluster_rmsd="1.85" reference_rmsd="3.11"/>
<run rank="3" sub_rank="4" run="47" binding_energy="-5.97" cluster_rmsd="1.14" reference_rmsd="3.62"/>
<run rank="3" sub_rank="5" run="18" binding_energy="-5.96" cluster_rmsd="1.84" reference_rmsd="3.14"/>
<run rank="3" sub_rank="6" run="38" binding_energy="-5.94" cluster_rmsd="1.58" reference_rmsd="2.88"/>
<run rank="3" sub_rank="7" run="5" binding_energy="-5.93" cluster_rmsd="1.58" reference_rmsd="2.89"/>
<run rank="3" sub_rank="8" run="41" binding_energy="-5.93" cluster_rmsd="1.61" reference_rmsd="4.62"/>
<run rank="3" sub_rank="9" run="17" binding_energy="-5.93" cluster_rmsd="1.58" reference_rmsd="2.89"/>
<run rank="3" sub_rank="10" run="44" binding_energy="-5.93" cluster_rmsd="1.58" reference_rmsd="2.89"/>
<run rank="3" sub_rank="11" run="2" binding_energy="-5.93" cluster_rmsd="1.82" reference_rmsd="3.90"/>
<run rank="4" sub_rank="1" run="39" binding_energy="-6.01" cluster_rmsd="0.00" reference_rmsd="2.12"/>
<run rank="4" sub_rank="2" run="40" binding_energy="-5.99" cluster_rmsd="1.83" reference_rmsd="2.14"/>
<run rank="4" sub_rank="3" run="1" binding_energy="-5.97" cluster_rmsd="1.80" reference_rmsd="2.55"/>
<run rank="4" sub_rank="4" run="23" binding_energy="-5.95" cluster_rmsd="1.14" reference_rmsd="2.12"/>
<run rank="4" sub_rank="5" run="19" binding_energy="-5.94" cluster_rmsd="1.13" reference_rmsd="2.12"/>
<run rank="4" sub_rank="6" run="45" binding_energy="-5.89" cluster_rmsd="1.98" reference_rmsd="2.06"/>
<run rank="4" sub_rank="7" run="26" binding_energy="-5.87" cluster_rmsd="1.52" reference_rmsd="2.18"/>
<run rank="5" sub_rank="1" run="49" binding_energy="-6.01" cluster_rmsd="0.00" reference_rmsd="3.03"/>
<run rank="5" sub_rank="2" run="13" binding_energy="-5.98" cluster_rmsd="1.21" reference_rmsd="2.94"/>
<run rank="5" sub_rank="3" run="32" binding_energy="-5.98" cluster_rmsd="1.23" reference_rmsd="2.93"/>
<run rank="5" sub_rank="4" run="29" binding_energy="-5.96" cluster_rmsd="1.26" reference_rmsd="2.93"/>
<run rank="5" sub_rank="5" run="16" binding_energy="-5.93" cluster_rmsd="1.79" reference_rmsd="3.00"/>
<run rank="5" sub_rank="6" run="15" binding_energy="-5.90" cluster_rmsd="1.82" reference_rmsd="2.94"/>
<run rank="6" sub_rank="1" run="22" binding_energy="-5.99" cluster_rmsd="0.00" reference_rmsd="2.98"/>
<run rank="6" sub_rank="2" run="6" binding_energy="-5.82" cluster_rmsd="1.76" reference_rmsd="2.66"/>
<run rank="7" sub_rank="1" run="36" binding_energy="-5.96" cluster_rmsd="0.00" reference_rmsd="5.13"/>
<run rank="7" sub_rank="2" run="20" binding_energy="-5.95" cluster_rmsd="0.04" reference_rmsd="5.13"/>
<run rank="7" sub_rank="3" run="7" binding_energy="-5.95" cluster_rmsd="1.67" reference_rmsd="5.23"/>
<run rank="7" sub_rank="4" run="9" binding_energy="-5.93" cluster_rmsd="0.09" reference_rmsd="5.15"/>
<run rank="7" sub_rank="5" run="33" binding_energy="-5.92" cluster_rmsd="0.07" reference_rmsd="5.13"/>
<run rank="7" sub_rank="6" run="21" binding_energy="-5.86" cluster_rmsd="1.22" reference_rmsd="6.15"/>
<run rank="8" sub_rank="1" run="42" binding_energy="-5.95" cluster_rmsd="0.00" reference_rmsd="4.90"/>
<run rank="8" sub_rank="2" run="12" binding_energy="-5.94" cluster_rmsd="1.64" reference_rmsd="4.71"/>
<run rank="9" sub_rank="1" run="27" binding_energy="-5.95" cluster_rmsd="0.00" reference_rmsd="3.45"/>
<run rank="9" sub_rank="2" run="30" binding_energy="-5.91" cluster_rmsd="1.15" reference_rmsd="3.97"/>
<run rank="9" sub_rank="3" run="50" binding_energy="-5.90" cluster_rmsd="1.77" reference_rmsd="2.97"/>
<run rank="10" sub_rank="1" run="4" binding_energy="-5.94" cluster_rmsd="0.00" reference_rmsd="5.49"/>
<run rank="11" sub_rank="1" run="3" binding_energy="-5.94" cluster_rmsd="0.00" reference_rmsd="3.21"/>
<run rank="11" sub_rank="2" run="28" binding_energy="-5.89" cluster_rmsd="1.72" reference_rmsd="3.11"/>

Goal: A table that looks like the following

Ligand 1
cluster_rank lowest_binding_energy run mean_binding_energy num_in_clus 1 -6.08 8 -6 5 2 -6.07 43 -5.97 5 3 -6.06 37 -5.96 11 4 -6.01 39 -5.95 7 5 -6.01 49 -5.96 6 6 -5.99 22 -5.91 2 7 -5.96 36 -5.93 6 8 -5.95 42 -5.95 2 9 -5.95 27 -5.92 3 10 -5.94 4 -5.94 1 11 -5.94 3 -5.91 2

I've been using the following command but with no success.



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source